SDDSlib
Loading...
Searching...
No Matches
sddsmultihist.c
Go to the documentation of this file.
1/**
2 * @file sddsmultihist.c
3 * @brief SDDS-format multi-column histogramming program.
4 *
5 * This program reads SDDS-formatted input data and generates histograms
6 * for specified columns. It supports various options for customizing
7 * the histogram generation, including automatic bin sizing, boundary data,
8 * normalization, and more.
9 *
10 * ## Usage
11 * ```
12 * sddsmultihist [<inputfile>] [<outputfile>] [options]
13 * ```
14 *
15 * ## Options
16 * -pipe=[input][,output]
17 * -columns=<name>[,...]
18 * -abscissa=<name>[,...]
19 * -exclude=<name>[,...]
20 * -bins=<integer>
21 * -sizeOfBins=<value>
22 * -autobins=target=<number>[,minimum=<integer>][,maximum=<integer>]
23 * -boundaryData=<filename>,<column>
24 * -sides[=close|against]
25 * -expand=<fraction>
26 * -lowerLimit=<value>[,...]
27 * -upperLimit=<value>[,...]
28 * -separate
29 * -cdf=[only]
30 * -weightColumn=<name>
31 * -majorOrder=row|column
32 * -normalize={sum|peak|no}
33 *
34 * @copyright
35 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
36 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
37 *
38 * @license
39 * This file is distributed under the terms of the Software License Agreement
40 * found in the file LICENSE included with this distribution.
41 *
42 * @author M. Borland, L. Emery, R. Soliday, H. Shang
43 */
44
45#include "mdb.h"
46#include "SDDS.h"
47#include "scan.h"
48#include "SDDSutils.h"
49#include <ctype.h>
50
51/* Enumeration for option types */
52enum option_type {
53 SET_COLUMNS,
54 SET_PIPE,
55 SET_EXCLUDE,
56 SET_ABSCISSA,
57 SET_BINS,
58 SET_SIZEOFBINS,
59 SET_LOWERLIMIT,
60 SET_UPPERLIMIT,
61 SET_SIDES,
62 SET_SEPARATE,
63 SET_EXPAND,
64 SET_CDF,
65 SET_AUTOBINS,
66 SET_MAJOR_ORDER,
67 SET_BOUNDARYDATA,
68 SET_WEIGHT,
69 SET_NORMALIZE,
70 N_OPTIONS
71};
72
73char *option[N_OPTIONS] = {
74 "columns",
75 "pipe",
76 "exclude",
77 "abscissa",
78 "bins",
79 "sizeofbins",
80 "lowerlimit",
81 "upperlimit",
82 "sides",
83 "separate",
84 "expand",
85 "cdf",
86 "autobins",
87 "majorOrder",
88 "boundaryData",
89 "weightColumn",
90 "normalize",
91};
92
93static char *USAGE =
94 "Usage: sddsmultihist [<inputfile>] [<outputfile>] [options]\n"
95 "\n"
96 "Options:\n"
97 " -pipe=[input][,output] The standard SDDS Toolkit pipe option.\n"
98 " -columns=<name>[,...] Specifies the names of columns from the input to be histogrammed.\n"
99 " Names may contain wildcards.\n"
100 " -abscissa=<name>[,...] Specifies the names of the abscissas in the output file.\n"
101 " When using column names as abscissa names,\n"
102 " the -abscissa option is not required (use -separate).\n"
103 " At least one abscissa name must be supplied if -separate is not used.\n"
104 " -exclude=<name>[,...] (Optional) Specifies column names to exclude from histogramming.\n"
105 " -bins=<integer> Sets the number of bins for the histogram.\n"
106 " -sizeOfBins=<value> Sets the size of each bin for the histogram.\n"
107 " -autobins=target=<number>[,minimum=<integer>][,maximum=<integer>]\n"
108 " Automatically determines the number of bins based on the target number of samples per bin.\n"
109 " Optionally specify minimum and maximum number of bins.\n"
110 " -boundaryData=<filename>,<column> Specifies irregular bin boundaries from a file.\n"
111 " Incompatible with -separate and -abscissa.\n"
112 " -sides[=close|against] Adds zero-height bins at the ends of the histogram.\n"
113 " 'close' centers the first and last bins.\n"
114 " 'against' aligns the first and last bins with the data range.\n"
115 " -expand=<fraction> Expands the range of the histogram by the given fraction.\n"
116 " -lowerLimit=<value>[,...] Sets lower limits for the histograms.\n"
117 " -upperLimit=<value>[,...] Sets upper limits for the histograms.\n"
118 " -separate Creates separate abscissas for each histogram in the output file.\n"
119 " -cdf=[only] Includes the Cumulative Distribution Function (CDF) in the output.\n"
120 " 'only' includes only the CDF, excluding the histogram.\n"
121 " -weightColumn=<name> Specifies a column to weight the histogram.\n"
122 " -majorOrder=row|column Sets the output file's data order to row-major or column-major.\n"
123 " -normalize={sum|peak|no} Normalizes the histogram.\n"
124 " 'sum' normalizes so that the sum of all bins equals 1.\n"
125 " 'peak' normalizes so that the peak bin equals 1.\n"
126 " 'no' applies no normalization.\n"
127 "\n"
128 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
129
130#define DO_SIDES 1
131#define DO_CLOSE_SIDES 2
132#define DO_AGAINST_SIDES 3
133
134#define NORMALIZE_PEAK 0
135#define NORMALIZE_SUM 1
136#define NORMALIZE_NO 2
137#define N_NORMALIZE_OPTIONS 3
138char *normalize_option[N_NORMALIZE_OPTIONS] = {
139 "peak", "sum", "no"};
140
141void SetUpOutput(SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, long **abscissaIndex, long **cdfIndex,
142 long **histogramIndex, char *output, char **columnName, long columnNames,
143 char **abscissaName, long abscissaNames, char *boundaryColumn, char *boundaryColumnUnits,
144 short columnMajorOrder, short normMode);
145double *ReadBoundaryData(char *file, char *column, int64_t *n, char **units);
146void MakeBoundaryHistogram(double *histogram, double *cdf, double *boundaryValue, int64_t nBoundaryValues,
147 double *data, double *weight, int64_t nData);
148void NormalizeHistogram(double *hist, int64_t bins, short mode);
149
150static short cdfOnly, freOnly;
151
152int main(int argc, char **argv) {
153 int iArg;
154 char *boundaryFile, *boundaryColumn, *boundaryColumnUnits;
155 double *boundaryValue;
156 int64_t nBoundaryValues;
157 char **abscissaName, **columnName, **excludeName;
158 long columnNames, excludeNames, abscissaNames, column, offset;
159 long givenLowerLimits, givenUpperLimits;
160 char *input, *output;
161 long readCode, binsGiven;
162 int64_t i, rows, bins, writeBins;
163 long lowerLimitGiven, upperLimitGiven, doSides, doSeparate;
164 double autoBinsTarget;
165 long autoBinsMinimum, autoBinsMaximum;
166 char *weightColumn;
167 double *weightData;
168 short normMode = NORMALIZE_NO;
169 unsigned long pipeFlags;
170 SCANNED_ARG *scanned;
171 SDDS_DATASET SDDSin, SDDSout;
172 double binSize, *lowerLimit = NULL, *upperLimit = NULL, *givenLowerLimit, *givenUpperLimit, *dx = NULL, range, middle;
173 double **inputData, *abscissa, *histogram, *minValue, *maxValue, transferLimit, *cdf, sum;
174 long *abscissaIndex, *histogramIndex, *cdfIndex;
175 double expandRange, maxRange;
176 char *CDFONLY;
177 unsigned long dummyFlags, majorOrderFlag;
178 short columnMajorOrder = -1;
179
181 argc = scanargs(&scanned, argc, argv);
182 if (argc < 3 || argc > (3 + N_OPTIONS)) {
183 fprintf(stderr, "%s", USAGE);
184 exit(EXIT_FAILURE);
185 }
186
187 minValue = maxValue = NULL;
188 output = input = NULL;
189 pipeFlags = 0;
190 abscissaName = NULL;
191 boundaryFile = boundaryColumn = boundaryColumnUnits = NULL;
192 boundaryValue = NULL;
193 offset = 0;
194 columnName = excludeName = NULL;
195 columnNames = excludeNames = abscissaNames = 0;
196 givenLowerLimits = givenUpperLimits = 0;
197 givenLowerLimit = givenUpperLimit = NULL;
198 bins = binsGiven = binSize = doSides = doSeparate = 0;
199 lowerLimitGiven = upperLimitGiven = 0;
200 expandRange = 0;
201 cdfOnly = 0;
202 freOnly = 1;
203 autoBinsTarget = 0;
204 autoBinsMinimum = autoBinsMaximum = 0;
205 weightColumn = NULL;
206 weightData = NULL;
207
208 for (iArg = 1; iArg < argc; iArg++) {
209 if (scanned[iArg].arg_type == OPTION) {
210 /* process options here */
211 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
212 case SET_MAJOR_ORDER:
213 majorOrderFlag = 0;
214 scanned[iArg].n_items--;
215 if (scanned[iArg].n_items > 0 && (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
216 SDDS_Bomb("invalid -majorOrder syntax/values");
217 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
218 columnMajorOrder = 1;
219 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
220 columnMajorOrder = 0;
221 break;
222 case SET_PIPE:
223 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
224 SDDS_Bomb("invalid -pipe syntax");
225 break;
226 case SET_COLUMNS:
227 if (columnName)
228 SDDS_Bomb("only one -columns option may be given");
229 if (scanned[iArg].n_items < 2)
230 SDDS_Bomb("invalid -columns syntax");
231 if (!(columnName = SDDS_Realloc(columnName, sizeof(*columnName) * (columnNames + scanned[iArg].n_items - 1))))
232 SDDS_Bomb("memory allocation failure");
233 for (i = 1; i < scanned[iArg].n_items; i++)
234 columnName[columnNames + i - 1] = scanned[iArg].list[i];
235 columnNames += scanned[iArg].n_items - 1;
236 break;
237 case SET_ABSCISSA:
238 if (abscissaName)
239 SDDS_Bomb("only one -abscissa option may be given");
240 if (scanned[iArg].n_items >= 2) {
241 if (!(abscissaName = SDDS_Realloc(abscissaName, sizeof(*abscissaName) * (abscissaNames + scanned[iArg].n_items - 1))))
242 SDDS_Bomb("memory allocation failure");
243 for (i = 1; i < scanned[iArg].n_items; i++)
244 abscissaName[abscissaNames + i - 1] = scanned[iArg].list[i];
245 abscissaNames += scanned[iArg].n_items - 1;
246 }
247 break;
248 case SET_BINS:
249 if (binsGiven)
250 SDDS_Bomb("-bins specified more than once");
251 binsGiven = 1;
252 if (sscanf(scanned[iArg].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
253 SDDS_Bomb("invalid value for bins---give a positive value");
254 break;
255 case SET_SIZEOFBINS:
256 if (sscanf(scanned[iArg].list[1], "%le", &binSize) != 1 || binSize <= 0)
257 SDDS_Bomb("invalid value for bin size---give a positive value");
258 break;
259 case SET_AUTOBINS:
260 if (scanned[iArg].n_items < 2)
261 SDDS_Bomb("incorrect -autoBins syntax");
262 scanned[iArg].n_items -= 1;
263 autoBinsTarget = autoBinsMinimum = autoBinsMaximum = 0;
264 if (!scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
265 "target", SDDS_DOUBLE, &autoBinsTarget, 1, 0,
266 "minimum", SDDS_LONG, &autoBinsMinimum, 1, 0,
267 "maximum", SDDS_LONG, &autoBinsMaximum, 1, 0, NULL) ||
268 autoBinsTarget <= 0 || autoBinsMinimum < 0 || autoBinsMaximum < 0)
269 SDDS_Bomb("incorrect -autoBins syntax or values");
270 break;
271 case SET_EXCLUDE:
272 if (excludeName)
273 SDDS_Bomb("only one -exclude option may be given");
274 if (scanned[iArg].n_items < 2)
275 SDDS_Bomb("invalid -exclude syntax");
276 if (!(excludeName = SDDS_Realloc(excludeName, sizeof(*excludeName) * (excludeNames + scanned[iArg].n_items - 1))))
277 SDDS_Bomb("memory allocation failure");
278 for (i = 1; i < scanned[iArg].n_items; i++)
279 excludeName[excludeNames + i - 1] = scanned[iArg].list[i];
280 excludeNames += scanned[iArg].n_items - 1;
281 break;
282 case SET_LOWERLIMIT:
283 if (lowerLimitGiven)
284 SDDS_Bomb("-lowerLimit specified more than once");
285 lowerLimitGiven = 1;
286 if (!(givenLowerLimit = SDDS_Realloc(givenLowerLimit, sizeof(*givenLowerLimit) * (givenLowerLimits + scanned[iArg].n_items - 1))))
287 SDDS_Bomb("SET_LOWERLIMIT: memory allocation failure");
288 for (i = 1; i < scanned[iArg].n_items; i++) {
289 if (sscanf(scanned[iArg].list[i], "%lf", &transferLimit) != 1)
290 SDDS_Bomb("invalid value for -lowerLimit");
291 givenLowerLimit[givenLowerLimits + i - 1] = transferLimit;
292 }
293 givenLowerLimits += scanned[iArg].n_items - 1;
294 break;
295 case SET_UPPERLIMIT:
296 if (upperLimitGiven)
297 SDDS_Bomb("-upperLimit specified more than once");
298 upperLimitGiven = 1;
299 if (!(givenUpperLimit = SDDS_Realloc(givenUpperLimit, sizeof(*givenUpperLimit) * (givenUpperLimits + scanned[iArg].n_items - 1))))
300 SDDS_Bomb("SET_UPPERLIMIT: memory allocation failure");
301 for (i = 1; i < scanned[iArg].n_items; i++) {
302 if (sscanf(scanned[iArg].list[i], "%lf", &transferLimit) != 1)
303 SDDS_Bomb("invalid value for -upperLimit");
304 givenUpperLimit[givenUpperLimits + i - 1] = transferLimit;
305 }
306 givenUpperLimits += scanned[iArg].n_items - 1;
307 break;
308 case SET_SIDES:
309 doSides = DO_SIDES;
310 if (scanned[iArg].n_items == 2) {
311 static char *sideOpt[2] = {"close", "against"};
312 switch (match_string(scanned[iArg].list[1], sideOpt, 2, 0)) {
313 case 0:
314 doSides = DO_CLOSE_SIDES;
315 break;
316 case 1:
317 doSides = DO_AGAINST_SIDES;
318 break;
319 default:
320 SDDS_Bomb("invalid value for -sides");
321 break;
322 }
323 }
324 break;
325 case SET_SEPARATE:
326 doSeparate = 1;
327 break;
328 case SET_EXPAND:
329 if (scanned[iArg].n_items != 2 ||
330 sscanf(scanned[iArg].list[1], "%lf", &expandRange) != 1 || expandRange <= 0)
331 SDDS_Bomb("invalid -expand syntax");
332 break;
333 case SET_CDF:
334 if (scanned[iArg].n_items == 1)
335 cdfOnly = 0;
336 else {
337 if (scanned[iArg].n_items > 2)
338 SDDS_Bomb("invalid -cdf syntax");
339 CDFONLY = scanned[iArg].list[1];
340 if (strcmp(CDFONLY, "only") != 0)
341 SDDS_Bomb("invalid -cdf value, it should be -cdf or -cdf=only");
342 cdfOnly = 1;
343 }
344 freOnly = 0;
345 break;
346 case SET_BOUNDARYDATA:
347 if (scanned[iArg].n_items != 3 ||
348 !(boundaryFile = scanned[iArg].list[1]) ||
349 !strlen(boundaryFile) ||
350 !(boundaryColumn = scanned[iArg].list[2]) ||
351 !strlen(boundaryColumn))
352 SDDS_Bomb("invalid -boundaryData syntax or values");
353 break;
354 case SET_WEIGHT:
355 if (scanned[iArg].n_items != 2 ||
356 !(weightColumn = scanned[iArg].list[1]) ||
357 !strlen(weightColumn))
358 SDDS_Bomb("invalid -weightColumn syntax or values");
359 break;
360 case SET_NORMALIZE:
361 if (scanned[iArg].n_items == 1)
362 normMode = NORMALIZE_SUM;
363 else if (scanned[iArg].n_items != 2 ||
364 (normMode = match_string(scanned[iArg].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
365 SDDS_Bomb("invalid -normalize syntax");
366 break;
367 default:
368 fprintf(stderr, "Error: unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
369 fprintf(stderr, "%s", USAGE);
370 exit(EXIT_FAILURE);
371 break;
372 }
373 } else {
374 if (!input)
375 input = scanned[iArg].list[0];
376 else if (!output)
377 output = scanned[iArg].list[0];
378 else
379 SDDS_Bomb("too many filenames seen");
380 }
381 }
382
383 if (boundaryColumn && (abscissaNames > 0 || doSeparate))
384 SDDS_Bomb("-boundaryData option is incompatible with -abscissa and -separate options");
385
386 if (columnNames <= 0)
387 SDDS_Bomb("Supply the names of columns to histogram with -columns");
388
389 processFilenames("sddsmultihist", &input, &output, pipeFlags, 0, NULL);
390
391 if (!SDDS_InitializeInput(&SDDSin, input)) {
392 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
393 exit(EXIT_FAILURE);
394 }
395
396 if ((columnNames = expandColumnPairNames(&SDDSin, &columnName, NULL, columnNames, excludeName, excludeNames, FIND_NUMERIC_TYPE, 0)) <= 0) {
397 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
398 SDDS_Bomb("No quantities selected to histogram.");
399 }
400
401 if (doSeparate) {
402 if (abscissaNames <= 0) {
403 if (!(abscissaName = SDDS_Realloc(abscissaName, sizeof(*abscissaName) * (abscissaNames + columnNames))))
404 SDDS_Bomb("memory allocation failure");
405 abscissaNames = 0;
406 for (i = 0; i < columnNames; i++) {
407 abscissaName[abscissaNames + i] = columnName[i];
408 }
409 abscissaNames += columnNames;
410 }
411 if (columnNames > 1) {
412 if (abscissaNames > 0) {
413 if (columnNames != abscissaNames)
414 SDDS_Bomb("the number of abscissa names must match the number of columns");
415 }
416 if (givenLowerLimits)
417 if (columnNames != givenLowerLimits)
418 SDDS_Bomb("the number of lower limits must match the number of columns");
419 if (givenUpperLimits)
420 if (columnNames != givenUpperLimits)
421 SDDS_Bomb("the number of upper limits must match the number of columns");
422 } else {
423 /* Handle single column with multiple option names */
424 if (abscissaNames > 0) {
425 if (columnNames != abscissaNames)
426 abscissaNames = 1;
427 }
428 if (givenLowerLimits)
429 if (columnNames != givenLowerLimits)
430 givenLowerLimits = 1;
431 if (givenUpperLimits)
432 if (columnNames != givenUpperLimits)
433 givenUpperLimits = 1;
434 }
435 } else if (boundaryFile) {
436 if (!(boundaryValue = ReadBoundaryData(boundaryFile, boundaryColumn, &nBoundaryValues, &boundaryColumnUnits))) {
437 SDDS_Bomb("Problem reading boundary data");
438 }
439 } else {
440 if (abscissaNames <= 0)
441 SDDS_Bomb("Supply the name of the abscissa with -abscissaName");
442 abscissaNames = 1;
443 }
444
445 SetUpOutput(&SDDSout, &SDDSin, &abscissaIndex, &cdfIndex, &histogramIndex, output, columnName, columnNames, abscissaName, abscissaNames, boundaryColumn, boundaryColumnUnits, columnMajorOrder, normMode);
446
447 if (!(inputData = (double **)malloc(columnNames * sizeof(*inputData))) ||
448 !(minValue = (double *)malloc(columnNames * sizeof(*minValue))) ||
449 !(maxValue = (double *)malloc(columnNames * sizeof(*maxValue))))
450 SDDS_Bomb("memory allocation failure");
451
452 if (((binSize ? 1 : 0) + (binsGiven ? 1 : 0) + (autoBinsTarget ? 1 : 0)) > 1)
453 SDDS_Bomb("Specify only one of -binSize, -bins, or -autoBins");
454 if (!binSize && !binsGiven && !autoBinsTarget) {
455 binsGiven = 1;
456 bins = 20;
457 }
458
459 abscissa = histogram = cdf = NULL;
460
461 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
462 if ((rows = SDDS_CountRowsOfInterest(&SDDSin)) < 0)
463 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
464
465 if (rows) {
466 if (weightColumn && !(weightData = SDDS_GetColumnInDoubles(&SDDSin, weightColumn)))
467 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
468
469 for (column = 0; column < columnNames; column++) {
470 if (!(inputData[column] = SDDS_GetColumnInDoubles(&SDDSin, columnName[column])))
471 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
472 }
473
474 if (!boundaryColumn) {
475 if (!(lowerLimit = (double *)malloc(sizeof(*lowerLimit) * columnNames)))
476 SDDS_Bomb("memory allocation failure");
477 if (!(upperLimit = (double *)malloc(sizeof(*upperLimit) * columnNames)))
478 SDDS_Bomb("memory allocation failure");
479 if (!(dx = (double *)malloc(sizeof(*dx) * columnNames)))
480 SDDS_Bomb("memory allocation failure");
481
482 if (doSeparate) {
483 for (column = 0; column < columnNames; column++) {
484 find_min_max(&minValue[column], &maxValue[column], inputData[column], rows);
485 lowerLimit[column] = givenLowerLimits ? givenLowerLimit[column] : minValue[column];
486 upperLimit[column] = givenUpperLimits ? givenUpperLimit[column] : maxValue[column];
487 }
488 } else {
489 for (column = 0; column < columnNames; column++)
490 find_min_max(&minValue[column], &maxValue[column], inputData[column], rows);
491 lowerLimit[0] = givenLowerLimits ? givenLowerLimit[0] : min_in_array(minValue, columnNames);
492 upperLimit[0] = givenUpperLimits ? givenUpperLimit[0] : max_in_array(maxValue, columnNames);
493 for (column = 1; column < columnNames; column++) {
494 lowerLimit[column] = lowerLimit[0];
495 upperLimit[column] = upperLimit[0];
496 }
497 }
498
499 range = (1 + expandRange) * (upperLimit[0] - lowerLimit[0]);
500 if (autoBinsTarget) {
501 bins = (int64_t)(rows / autoBinsTarget);
502 if (autoBinsMinimum) {
503 if (bins < autoBinsMinimum)
504 bins = autoBinsMinimum;
505 } else if (bins < 5) {
506 bins = 5;
507 }
508 if (autoBinsMaximum) {
509 if (bins > autoBinsMaximum)
510 bins = autoBinsMaximum;
511 } else if (bins > rows)
512 bins = rows;
513 }
514
515 if (binSize) {
516 maxRange = range;
517 for (column = 0; column < columnNames; column++) {
518 range = (1 + expandRange) * (upperLimit[column] - lowerLimit[column]);
519 range = ((range / binSize) + 1) * binSize;
520 if (range > maxRange)
521 maxRange = range;
522 middle = (lowerLimit[column] + upperLimit[column]) / 2;
523 lowerLimit[column] = middle - range / 2;
524 upperLimit[column] = middle + range / 2;
525 }
526 range = maxRange;
527 if (!doSeparate) {
528 lowerLimit[0] = min_in_array(lowerLimit, columnNames);
529 upperLimit[0] = max_in_array(upperLimit, columnNames);
530 dx[0] = binSize;
531 }
532 bins = maxRange / binSize + 0.5;
533 if (bins < 1 && !doSides)
534 bins = 2;
535 if (doSeparate)
536 for (column = 0; column < columnNames; column++) {
537 range = upperLimit[column] - lowerLimit[column];
538 upperLimit[column] += (maxRange - range) / 2;
539 lowerLimit[column] -= (maxRange - range) / 2;
540 dx[column] = binSize;
541 }
542 } else {
543 if (doSeparate) {
544 for (column = 0; column < columnNames; column++) {
545 range = (1 + expandRange) * (upperLimit[column] - lowerLimit[column]);
546 middle = (upperLimit[column] + lowerLimit[column]) / 2;
547 upperLimit[column] = middle + range / 2;
548 lowerLimit[column] = middle - range / 2;
549 if (upperLimit[column] == lowerLimit[column]) {
550 if (fabs(upperLimit[column]) < sqrt(DBL_MIN)) {
551 upperLimit[column] = sqrt(DBL_MIN);
552 lowerLimit[column] = -sqrt(DBL_MIN);
553 } else {
554 lowerLimit[column] = upperLimit[column] * (1 - 10000 * DBL_EPSILON);
555 upperLimit[column] = upperLimit[column] * (1 + 10000 * DBL_EPSILON);
556 }
557 }
558 dx[column] = (upperLimit[column] - lowerLimit[column]) / bins;
559 }
560 } else {
561 range = (1 + expandRange) * (upperLimit[0] - lowerLimit[0]);
562 middle = (upperLimit[0] + lowerLimit[0]) / 2;
563 upperLimit[0] = middle + range / 2;
564 lowerLimit[0] = middle - range / 2;
565 if (upperLimit[0] == lowerLimit[0]) {
566 if (fabs(upperLimit[0]) < sqrt(DBL_MIN)) {
567 upperLimit[0] = sqrt(DBL_MIN);
568 lowerLimit[0] = -sqrt(DBL_MIN);
569 } else {
570 lowerLimit[0] = upperLimit[0] * (1 - 10000 * DBL_EPSILON);
571 upperLimit[0] = upperLimit[0] * (1 + 10000 * DBL_EPSILON);
572 }
573 }
574 dx[0] = (upperLimit[0] - lowerLimit[0]) / bins;
575 }
576 }
577 if (!binsGiven || !abscissa) {
578 if (!(abscissa = SDDS_Realloc(abscissa, sizeof(*abscissa) * (bins + 2))) ||
579 !(cdf = SDDS_Realloc(cdf, sizeof(*cdf) * (bins + 2))) ||
580 !(histogram = SDDS_Realloc(histogram, sizeof(*histogram) * (bins + 2))))
581 SDDS_Bomb("memory allocation failure");
582 }
583 writeBins = bins + (doSides ? 2 : 0);
584 offset = doSides ? 0 : 1;
585 } else {
586 bins = writeBins = nBoundaryValues;
587 abscissa = NULL;
588 if (!(cdf = SDDS_Realloc(cdf, sizeof(*cdf) * bins)) ||
589 !(histogram = SDDS_Realloc(histogram, sizeof(*histogram) * bins)))
590 SDDS_Bomb("memory allocation failure");
591 }
592
593 if (!SDDS_StartPage(&SDDSout, writeBins) || !SDDS_CopyParameters(&SDDSout, &SDDSin))
594 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
595
596 if (boundaryColumn) {
597 for (column = 0; column < columnNames; column++) {
598 MakeBoundaryHistogram(histogram, cdf, boundaryValue, nBoundaryValues, inputData[column], weightData, rows);
599 NormalizeHistogram(histogram, nBoundaryValues, normMode);
600 if (!cdfOnly && !SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, histogram, writeBins, histogramIndex[column]))
601 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
602 if (!freOnly && !SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, cdf, writeBins, cdfIndex[column]))
603 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
604 free(inputData[column]);
605 }
606 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, boundaryValue, writeBins, boundaryColumn))
607 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
608 } else if (!doSeparate) {
609 for (i = -1; i < bins + 1; i++)
610 abscissa[i + 1] = (i + 0.5) * dx[0] + lowerLimit[0];
611 switch (doSides) {
612 case DO_CLOSE_SIDES:
613 abscissa[0] = abscissa[1] - dx[0] / 2;
614 abscissa[bins + 1] = abscissa[bins] + dx[0] / 2;
615 break;
616 case DO_AGAINST_SIDES:
617 abscissa[0] = abscissa[1];
618 abscissa[bins + 1] = abscissa[bins];
619 break;
620 }
621 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, abscissa + offset, writeBins, abscissaIndex[0]))
622 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
623 for (column = 0; column < columnNames; column++) {
624 histogram[0] = histogram[bins + 1] = 0;
625 if (!weightColumn)
626 make_histogram(histogram + 1, bins, lowerLimit[0], upperLimit[0], inputData[column], rows, 1);
627 else
628 make_histogram_weighted(histogram + 1, bins, lowerLimit[0], upperLimit[0], inputData[column], rows, 1, weightData);
629 NormalizeHistogram(histogram, bins, normMode);
630 sum = 0;
631 for (i = 0; i <= bins + 1; i++)
632 sum += histogram[i];
633 for (i = 0; i <= bins + 1; i++) {
634 if (!i)
635 cdf[i] = histogram[i] / sum;
636 else
637 cdf[i] = cdf[i - 1] + histogram[i] / sum;
638 }
639 if (!cdfOnly) {
640 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, histogram + offset, writeBins, histogramIndex[column]))
641 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
642 }
643 if (!freOnly) {
644 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, cdf + offset, writeBins, cdfIndex[column]))
645 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
646 }
647 free(inputData[column]);
648 }
649 } else {
650 for (column = 0; column < columnNames; column++) {
651 for (i = -1; i < bins + 1; i++)
652 abscissa[i + 1] = (i + 0.5) * dx[column] + lowerLimit[column];
653 switch (doSides) {
654 case DO_CLOSE_SIDES:
655 abscissa[0] = abscissa[1] - dx[column] / 2;
656 abscissa[bins + 1] = abscissa[bins] + dx[column] / 2;
657 break;
658 case DO_AGAINST_SIDES:
659 abscissa[0] = abscissa[1];
660 abscissa[bins + 1] = abscissa[bins];
661 break;
662 }
663 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, abscissa + offset, writeBins, abscissaIndex[column]))
664 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
665 histogram[0] = histogram[bins + 1] = 0;
666 if (!weightColumn)
667 make_histogram(histogram + 1, bins, lowerLimit[column], upperLimit[column], inputData[column], rows, 1);
668 else
669 make_histogram_weighted(histogram + 1, bins, lowerLimit[column], upperLimit[column], inputData[column], rows, 1, weightData);
670 NormalizeHistogram(histogram, bins + 2, normMode);
671 sum = 0;
672 for (i = 0; i <= bins + 1; i++)
673 sum += histogram[i];
674 for (i = 0; i <= bins + 1; i++) {
675 if (!i)
676 cdf[i] = histogram[i] / sum;
677 else
678 cdf[i] = cdf[i - 1] + histogram[i] / sum;
679 }
680 if (!cdfOnly) {
681 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, histogram + offset, writeBins, histogramIndex[column]))
682 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
683 }
684 if (!freOnly) {
685 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, cdf + offset, writeBins, cdfIndex[column]))
686 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
687 }
688 free(inputData[column]);
689 }
690 }
691 } else {
692 if (!SDDS_StartPage(&SDDSout, rows ? bins : 0))
693 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
694 }
695 if (weightData) {
696 free(weightData);
697 weightData = NULL;
698 }
699 if (!SDDS_WritePage(&SDDSout))
700 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
701 }
702
703 if (!SDDS_Terminate(&SDDSin)) {
704 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
705 exit(EXIT_FAILURE);
706 }
707 if (!SDDS_Terminate(&SDDSout)) {
708 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
709 exit(EXIT_FAILURE);
710 }
711
712 free(histogram);
713 free(cdf);
714 free(abscissa);
715 if (minValue)
716 free(minValue);
717 if (maxValue)
718 free(maxValue);
719 if (lowerLimit)
720 free(lowerLimit);
721 if (upperLimit)
722 free(upperLimit);
723 if (dx)
724 free(dx);
725
726 return EXIT_SUCCESS;
727}
728
729void SetUpOutput(SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, long **abscissaIndex, long **cdfIndex,
730 long **histogramIndex, char *output, char **columnName, long columnNames,
731 char **abscissaName, long abscissaNames, char *boundaryColumn, char *boundaryUnits,
732 short columnMajorOrder, short normMode) {
733 long column;
734 char s[SDDS_MAXLINE];
735 int32_t outputType = SDDS_DOUBLE;
736 char *blankString = "";
737
738 if (!SDDS_InitializeOutput(SDDSout, SDDS_BINARY, 0, NULL, "sddsmultihist output", output))
739 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
740
741 if (columnMajorOrder != -1)
742 SDDSout->layout.data_mode.column_major = columnMajorOrder;
743 else
744 SDDSout->layout.data_mode.column_major = SDDSin->layout.data_mode.column_major;
745
746 if (!(*cdfIndex = (long *)malloc(sizeof(**cdfIndex) * columnNames)))
747 SDDS_Bomb("memory allocation failure");
748 if (!cdfOnly) {
749 if (!(*histogramIndex = (long *)malloc(sizeof(**histogramIndex) * columnNames)))
750 SDDS_Bomb("memory allocation failure");
751 }
752 if (!boundaryColumn) {
753 if (!(*abscissaIndex = (long *)malloc(sizeof(**abscissaIndex) * columnNames)))
754 SDDS_Bomb("memory allocation failure");
755
756 for (column = 0; column < abscissaNames; column++) {
757 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, columnName[column], abscissaName[column]) ||
758 !SDDS_ChangeColumnInformation(SDDSout, "type", &outputType, SDDS_BY_NAME, abscissaName[column]) ||
759 ((*abscissaIndex)[column] = SDDS_GetColumnIndex(SDDSout, abscissaName[column])) < 0)
760 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
761 if (strcmp(columnName[column], abscissaName[column]) != 0 &&
762 (!SDDS_ChangeColumnInformation(SDDSout, "description", blankString, SDDS_BY_NAME, abscissaName[column]) ||
763 !SDDS_ChangeColumnInformation(SDDSout, "symbol", blankString, SDDS_BY_NAME, abscissaName[column])))
764 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
765 }
766 } else {
767 if (!SDDS_DefineSimpleColumn(SDDSout, boundaryColumn, boundaryUnits, SDDS_DOUBLE))
768 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
769 }
770
771 for (column = 0; column < columnNames; column++) {
772 if (!freOnly) {
773 sprintf(s, "%sCdf", columnName[column]);
774 if (((*cdfIndex)[column] = SDDS_DefineColumn(SDDSout, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
775 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
776 }
777 if (!cdfOnly) {
778 switch (normMode) {
779 case NORMALIZE_PEAK:
780 sprintf(s, "%sRelativeFrequency", columnName[column]);
781 break;
782 case NORMALIZE_SUM:
783 sprintf(s, "%sFractionalFrequency", columnName[column]);
784 break;
785 default:
786 sprintf(s, "%sFrequency", columnName[column]);
787 break;
788 }
789 if (((*histogramIndex)[column] = SDDS_DefineColumn(SDDSout, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
790 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
791 }
792 }
793 if (!SDDS_TransferAllParameterDefinitions(SDDSout, SDDSin, SDDS_TRANSFER_KEEPOLD) ||
794 !SDDS_WriteLayout(SDDSout))
795 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
796}
797
798double *ReadBoundaryData(char *file, char *column, int64_t *n, char **units) {
799 SDDS_DATASET SDDSin;
800 double *data;
801 int64_t j;
802
803 if (!SDDS_InitializeInput(&SDDSin, file)) {
804 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
805 }
806
807 if (SDDS_GetColumnInformation(&SDDSin, "units", units, SDDS_GET_BY_NAME, column) != SDDS_STRING)
808 return NULL;
809
810 if (SDDS_ReadPage(&SDDSin) <= 0) {
811 SDDS_SetError("No pages in boundary data file");
812 return NULL;
813 }
814
815 *n = SDDS_RowCount(&SDDSin);
816 if (*n <= 0)
817 return NULL;
818 if (!(data = SDDS_GetColumnInDoubles(&SDDSin, column)))
819 return NULL;
820 for (j = 1; j < (*n); j++) {
821 if (data[j] <= data[j - 1]) {
822 memmove(data + j, data + j + 1, sizeof(*data) * ((*n) - 1 - j));
823 *n -= 1;
824 j -= 1;
825 }
826 }
827 return data;
828}
829
830void MakeBoundaryHistogram(double *histogram, double *cdf, double *boundaryValue, int64_t nBoundaryValues,
831 double *data, double *weight, int64_t nData) {
832 int64_t i, j;
833 for (i = 0; i < nBoundaryValues; i++)
834 histogram[i] = cdf[i] = 0;
835 for (i = 0; i < nData; i++) {
836 j = binaryArraySearch(boundaryValue, sizeof(double), nBoundaryValues, &data[i], double_cmpasc, 1) + 1;
837 if (j < nBoundaryValues && j >= 0)
838 histogram[j] += weight ? fabs(weight[i]) : 1;
839 }
840 cdf[0] = histogram[0];
841 for (j = 1; j < nBoundaryValues; j++) {
842 cdf[j] = cdf[j - 1] + histogram[j];
843 }
844 if (cdf[nBoundaryValues - 1] > 0)
845 for (j = 0; j < nBoundaryValues; j++)
846 cdf[j] /= cdf[nBoundaryValues - 1];
847}
848
849void NormalizeHistogram(double *hist, int64_t bins, short mode) {
850 int64_t i;
851 double value = 0;
852 switch (mode) {
853 case NORMALIZE_SUM:
854 for (i = 0; i < bins; i++)
855 value += hist[i];
856 break;
857 case NORMALIZE_PEAK:
858 for (i = 0; i < bins; i++)
859 if (hist[i] > value)
860 value = hist[i];
861 break;
862 default:
863 return;
864 break;
865 }
866 if (value)
867 for (i = 0; i < bins; i++)
868 hist[i] /= value;
869}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
long binaryArraySearch(void *array, size_t elemSize, long members, void *key, int(*compare)(const void *c1, const void *c2), long bracket)
Searches for a key in a sorted array of data values using binary search.
Definition binsert.c:156
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
Definition findMinMax.c:318
double min_in_array(double *array, long n)
Finds the minimum value in an array of doubles.
Definition findMinMax.c:336
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int double_cmpasc(const void *a, const void *b)
Compare two doubles in ascending order.