SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddshist.c
Go to the documentation of this file.
1/**
2 * @file sddshist.c
3 * @brief Command-line tool for generating histograms from SDDS-formatted data.
4 *
5 * @details
6 * This program is a command-line tool designed to generate histograms from data contained in SDDS (Self Describing Data Set) files.
7 * It supports various configurations like normalization, filtering, multi-threading for performance, and the ability to define
8 * histograms based on specific regions in the data.
9 *
10 * @section Usage
11 * ```
12 * sddshist [<inputfile>] [<outputfile>]
13 * [-pipe=[input][,output]]
14 * -dataColumn=<column-name>
15 * [{
16 * -bins=<number> |
17 * -sizeOfBins=<value> |
18 * -regions=filename=<filename>,position=<columnName>,name=<columnName>
19 * }]
20 * [-lowerLimit=<value>]
21 * [-upperLimit=<value>]
22 * [-expand=<factor>]
23 * [-filter=<column-name>,<lower-limit>,<upper-limit>]
24 * [-weightColumn=<column-name>]
25 * [-sides[=<points>]]
26 * [-normalize[={sum|area|peak}]]
27 * [-cdf[=only]]
28 * [-threads=<number>]
29 * [-statistics]
30 * [-verbose]
31 * [-majorOrder=row|column]
32 * ```
33 *
34 * @section Options
35 * | Required | Description |
36 * |----------|-----------------------------------------------------------------------------|
37 * | `-dataColumn=<column-name>` | Specifies the column to be histogrammed. |
38 *
39 * | Optional | Description |
40 * |----------|-----------------------------------------------------------------------------|
41 * | `-pipe` | Use pipe for input and/or output. |
42 * | `-bins` | Set the number of bins for the histogram. |
43 * | `-sizeOfBins` | Set the size of each bin. |
44 * | `-regions` | Define region-based histogramming with file and column information. |
45 * | `-lowerLimit` | Set the lower limit for the histogram. |
46 * | `-upperLimit` | Set the upper limit for the histogram. |
47 * | `-expand` | Expand the histogram range by a given factor. |
48 * | `-filter` | Filter data points based on values in a specified column. |
49 * | `-weightColumn` | Specify a column to weight the histogram. |
50 * | `-sides` | Extend the histogram to zero level. |
51 * | `-normalize` | Normalize the histogram (`sum`, `area`, or `peak`). |
52 * | `-cdf` | Include the Cumulative Distribution Function (CDF) in the output. |
53 * | `-threads` | Set the number of threads for processing. |
54 * | `-statistics` | Include statistical details in the output. |
55 * | `-verbose` | Print additional processing details. |
56 * | `-majorOrder` | Set data major order (row or column). |
57 *
58 * @subsection Incompatibilities
59 * - `-bins`, `-sizeOfBins`, and `-regions` are mutually exclusive.
60 * - `-normalize` and `-cdf=only` cannot be used together.
61 *
62 * @copyright
63 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
64 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
65 *
66 * @license
67 * This file is distributed under the terms of the Software License Agreement
68 * found in the file LICENSE included with this distribution.
69 *
70 * @authors
71 * M. Borland, C. Saunders, R. Soliday, H. Shang
72 */
73
74#include "mdb.h"
75#include "scan.h"
76#include "SDDS.h"
77
78/* Enumeration for option types */
79enum option_type {
80 SET_BINS,
81 SET_LOWERLIMIT,
82 SET_UPPERLIMIT,
83 SET_DATACOLUMN,
84 SET_FILTER,
85 SET_BINSIZE,
86 SET_WEIGHTCOLUMN,
87 SET_NORMALIZE,
88 SET_STATISTICS,
89 SET_SIDES,
90 SET_VERBOSE,
91 SET_PIPE,
92 SET_CDF,
93 SET_EXPAND,
94 SET_MAJOR_ORDER,
95 SET_REGION_FILE,
96 SET_THREADS,
97 N_OPTIONS
98};
99
100char *option[N_OPTIONS] = {
101 "bins", "lowerlimit", "upperlimit", "datacolumn",
102 "filter", "sizeofbins", "weightcolumn",
103 "normalize", "statistics", "sides", "verbose", "pipe", "cdf", "expand",
104 "majorOrder", "regions", "threads"
105};
106
107char *USAGE =
108 "Usage: sddshist [<inputfile>] [<outputfile>]\n"
109 " [-pipe=[input][,output]]\n"
110 " -dataColumn=<column-name>\n"
111 " [{\n"
112 " -bins=<number> |\n"
113 " -sizeOfBins=<value> |\n"
114 " -regions=filename=<filename>,position=<columnName>,name=<columnName>\n"
115 " }]\n"
116 " [-lowerLimit=<value>]\n"
117 " [-upperLimit=<value>]\n"
118 " [-expand=<factor>]\n"
119 " [-filter=<column-name>,<lower-limit>,<upper-limit>]\n"
120 " [-weightColumn=<column-name>]\n"
121 " [-sides[=<points>]]\n"
122 " [-normalize[={sum|area|peak}]]\n"
123 " [-cdf[=only]]\n"
124 " [-threads=<number>]\n"
125 " [-statistics]\n"
126 " [-verbose]\n"
127 " [-majorOrder=row|column]\n"
128 "Options:\n"
129 " -pipe=[input][,output] Use pipe for input and/or output.\n"
130 " -dataColumn=<column-name> Specify the column to histogram.\n"
131 " -bins=<number> Set the number of bins for the histogram.\n"
132 " -sizeOfBins=<value> Set the size of each bin.\n"
133 " -regions=filename=<filename>,position=<columnName>,name=<columnName>\n"
134 " Define region-based histogramming.\n"
135 " -lowerLimit=<value> Set the lower limit of the histogram.\n"
136 " -upperLimit=<value> Set the upper limit of the histogram.\n"
137 " -expand=<factor> Expand the range of the histogram by the given factor.\n"
138 " -filter=<column-name>,<lower>,<upper> Filter data points based on column values.\n"
139 " -weightColumn=<column-name> Weight the histogram with the specified column.\n"
140 " -sides[=<points>] Add sides to the histogram down to zero level.\n"
141 " -normalize[={sum|area|peak}] Normalize the histogram.\n"
142 " -cdf[=only] Include the CDF in the output. Use 'only' to exclude the histogram.\n"
143 " -threads=<number> Specify the number of threads to use.\n"
144 " -statistics Include statistical information in the output.\n"
145 " -verbose Enable informational printouts during processing.\n"
146 " -majorOrder=row|column Set the major order of data.\n\n"
147 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
148
149#define NORMALIZE_PEAK 0
150#define NORMALIZE_AREA 1
151#define NORMALIZE_SUM 2
152#define NORMALIZE_NO 3
153#define N_NORMALIZE_OPTIONS 4
154char *normalize_option[N_NORMALIZE_OPTIONS] = {
155 "peak", "area", "sum", "no"
156};
157
158static int64_t filter(double *x, double *y, double *filterData, int64_t npts, double lower_filter, double upper_filter);
159static long setupOutputFile(SDDS_DATASET *outTable, char *outputfile, SDDS_DATASET *inTable, char *inputfile,
160 char *dataColumn, char *weightColumn, char *filterColumn, double lowerFilter,
161 double upperFilter, SDDS_DATASET *regionTable, char *regionNameColumn, long doStats,
162 int64_t bins, double binSize, long normalizeMode, short columnMajorOrder);
163
164/* Column and parameter indices for output file */
165static long iIndep, iFreq, iBins, iBinSize, iLoFilter, iUpFilter, iMean, iRMS, iStDev, iPoints, iCdf;
166static short cdfOnly, freOnly;
167
168int64_t readRegionFile(SDDS_DATASET *SDDSin, char *filename, char *positionColumn, char *nameColumn, double **regionPosition, char ***regionName);
169void classifyByRegion(double *data, double *weight, int64_t points, double *histogram, double *regionPosition, int64_t bins);
170
171int main(int argc, char **argv) {
172 /* Flags to keep track of what is set in command line */
173 long binsGiven, lowerLimitGiven, upperLimitGiven;
174 SDDS_DATASET inTable, outTable;
175 double *data; /* Pointer to the array to histogram */
176 double *filterData; /* Pointer to the filter data */
177 double *weightData; /* Pointer to the weight data */
178 double *hist, *hist1; /* To store the histogram */
179 double *CDF, *CDF1; /* To store the CDF */
180 double sum; /* Total of histogram */
181 double *indep; /* Values of bin centers */
182 double lowerLimit, upperLimit; /* Lower and upper limits in histogram */
183 double givenLowerLimit, givenUpperLimit; /* Given lower and upper limits */
184 double range, binSize;
185 int64_t bins; /* Number of bins in the histogram */
186 long doStats; /* Include statistics in output file */
187 double mean, rms, standDev, mad;
188 char *filterColumn, *dataColumn, *weightColumn;
189 double lowerFilter = 0, upperFilter = 0; /* Filter range */
190 int64_t points; /* Number of data points after filtering */
191 SCANNED_ARG *scanned; /* Scanned argument structure */
192 char *inputfile, *outputfile; /* Input and output filenames */
193 double dx; /* Spacing of bins in histogram */
194 int64_t i; /* Loop variable */
195 long pointsBinned; /* Number of points in histogram */
196 long normalizeMode, doSides, verbose, readCode;
197 int64_t rows;
198 unsigned long pipeFlags, majorOrderFlag, regionFlags = 0;
199 char *cdf;
200 double expansionFactor = 0;
201 short columnMajorOrder = -1;
202 char *regionFilename = NULL, *regionPositionColumn = NULL, *regionNameColumn = NULL;
203 double *regionPosition = NULL;
204 int64_t nRegions = 0;
205 char **regionName = NULL;
206 SDDS_DATASET SDDSregion;
207 int threads = 1;
208
210 argc = scanargs(&scanned, argc, argv);
211 if (argc < 3) {
212 fprintf(stderr, "%s\n", USAGE);
213 exit(EXIT_FAILURE);
214 }
215
216 binsGiven = lowerLimitGiven = upperLimitGiven = 0;
217 binSize = doSides = 0;
218 inputfile = outputfile = NULL;
219 dataColumn = filterColumn = weightColumn = NULL;
220 doStats = verbose = 0;
221 normalizeMode = NORMALIZE_NO;
222 pipeFlags = 0;
223 dx = 0;
224 cdfOnly = 0;
225 freOnly = 1;
226
227 for (i = 1; i < argc; i++) {
228 if (scanned[i].arg_type == OPTION) {
229 switch (match_string(scanned[i].list[0], option, N_OPTIONS, 0)) {
230 case SET_MAJOR_ORDER:
231 majorOrderFlag = 0;
232 scanned[i].n_items--;
233 if (scanned[i].n_items > 0 && (!scanItemList(&majorOrderFlag, scanned[i].list + 1, &scanned[i].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
234 SDDS_Bomb("invalid -majorOrder syntax/values");
235 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
236 columnMajorOrder = 1;
237 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
238 columnMajorOrder = 0;
239 break;
240 case SET_BINS: /* Set number of bins */
241 if (binsGiven)
242 SDDS_Bomb("-bins specified more than once");
243 binsGiven = 1;
244 if (sscanf(scanned[i].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
245 SDDS_Bomb("invalid value for bins");
246 break;
247 case SET_LOWERLIMIT:
248 if (lowerLimitGiven)
249 SDDS_Bomb("-lowerLimit specified more than once");
250 lowerLimitGiven = 1;
251 if (sscanf(scanned[i].list[1], "%lf", &givenLowerLimit) != 1)
252 SDDS_Bomb("invalid value for lowerLimit");
253 break;
254 case SET_UPPERLIMIT:
255 if (upperLimitGiven)
256 SDDS_Bomb("-upperLimit specified more than once");
257 upperLimitGiven = 1;
258 if (sscanf(scanned[i].list[1], "%lf", &givenUpperLimit) != 1)
259 SDDS_Bomb("invalid value for upperLimit");
260 break;
261 case SET_EXPAND:
262 expansionFactor = 0;
263 if (sscanf(scanned[i].list[1], "%lf", &expansionFactor) != 1 || expansionFactor <= 0)
264 SDDS_Bomb("invalid value for expand");
265 break;
266 case SET_DATACOLUMN:
267 if (dataColumn)
268 SDDS_Bomb("-dataColumn specified more than once");
269 if (scanned[i].n_items != 2)
270 SDDS_Bomb("invalid -dataColumn syntax---supply name");
271 dataColumn = scanned[i].list[1];
272 break;
273 case SET_FILTER:
274 if (filterColumn)
275 SDDS_Bomb("multiple filter specifications not allowed");
276 if (scanned[i].n_items != 4 || sscanf(scanned[i].list[2], "%lf", &lowerFilter) != 1 ||
277 sscanf(scanned[i].list[3], "%lf", &upperFilter) != 1 || lowerFilter > upperFilter)
278 SDDS_Bomb("invalid -filter syntax/values");
279 filterColumn = scanned[i].list[1];
280 break;
281 case SET_WEIGHTCOLUMN:
282 if (weightColumn)
283 SDDS_Bomb("multiple weighting columns not allowed");
284 if (scanned[i].n_items != 2)
285 SDDS_Bomb("-weightColumn requires a column name");
286 weightColumn = scanned[i].list[1];
287 break;
288 case SET_NORMALIZE:
289 if (scanned[i].n_items == 1)
290 normalizeMode = NORMALIZE_SUM;
291 else if (scanned[i].n_items != 2 || (normalizeMode = match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
292 SDDS_Bomb("invalid -normalize syntax");
293 break;
294 case SET_STATISTICS:
295 doStats = 1;
296 break;
297 case SET_SIDES:
298 if (scanned[i].n_items == 1)
299 doSides = 1;
300 else if (scanned[i].n_items > 2 || (sscanf(scanned[i].list[1], "%ld", &doSides) != 1 || doSides <= 0))
301 SDDS_Bomb("invalid -sides syntax");
302 break;
303 case SET_VERBOSE:
304 verbose = 1;
305 break;
306 case SET_BINSIZE:
307 if (sscanf(scanned[i].list[1], "%le", &binSize) != 1 || binSize <= 0)
308 SDDS_Bomb("invalid value for bin size");
309 break;
310 case SET_PIPE:
311 if (!processPipeOption(scanned[i].list + 1, scanned[i].n_items - 1, &pipeFlags))
312 SDDS_Bomb("invalid -pipe syntax");
313 break;
314 case SET_CDF:
315 if (scanned[i].n_items == 1)
316 cdfOnly = 0;
317 else {
318 if (scanned[i].n_items != 2)
319 SDDS_Bomb("invalid -cdf syntax");
320 cdf = scanned[i].list[1];
321 if (strcmp(cdf, "only") != 0)
322 SDDS_Bomb("invalid -cdf value, it should be -cdf or -cdf=only");
323 cdfOnly = 1;
324 }
325 freOnly = 0;
326 break;
327 case SET_REGION_FILE:
328 if (scanned[i].n_items != 4)
329 SDDS_Bomb("invalid -regionFile syntax");
330 regionFlags = 0;
331 scanned[i].n_items -= 1;
332 if (!scanItemList(&regionFlags, scanned[i].list + 1, &scanned[i].n_items, 0,
333 "filename", SDDS_STRING, &regionFilename, 1, 1,
334 "position", SDDS_STRING, &regionPositionColumn, 1, 2,
335 "name", SDDS_STRING, &regionNameColumn, 1, 4, NULL) ||
336 regionFlags != (1 + 2 + 4) || !regionFilename || !regionPositionColumn || !regionNameColumn)
337 SDDS_Bomb("invalid -regionFile syntax");
338 break;
339 case SET_THREADS:
340 if (scanned[i].n_items != 2 ||
341 !sscanf(scanned[i].list[1], "%d", &threads) || threads < 1)
342 SDDS_Bomb("invalid -threads syntax");
343 break;
344 default:
345 fprintf(stderr, "Error: option %s not recognized\n", scanned[i].list[0]);
346 exit(EXIT_FAILURE);
347 break;
348 }
349 } else {
350 /* Argument is filename */
351 if (!inputfile)
352 inputfile = scanned[i].list[0];
353 else if (!outputfile)
354 outputfile = scanned[i].list[0];
355 else
356 SDDS_Bomb("too many filenames seen");
357 }
358 }
359
360 processFilenames("sddshist", &inputfile, &outputfile, pipeFlags, 0, NULL);
361
362 if (binSize && binsGiven && regionFlags)
363 SDDS_Bomb("Provide only one of -bins, -sizeOfBins, or -regions");
364 if (!binsGiven)
365 bins = 20;
366 if (!dataColumn)
367 SDDS_Bomb("-dataColumn must be specified");
368
369 if (regionFlags) {
370 if (!(nRegions = readRegionFile(&SDDSregion, regionFilename, regionPositionColumn, regionNameColumn, &regionPosition, &regionName)))
371 SDDS_Bomb("Problem with region file. Check existence and type of columns");
372 doSides = 0;
373 bins = nRegions + 1;
374 }
375
376 hist = tmalloc(sizeof(*hist) * (bins + 2 * doSides));
377 CDF = CDF1 = tmalloc(sizeof(*hist) * (bins + 2 * doSides));
378 indep = tmalloc(sizeof(*indep) * (bins + 2 * doSides));
379 pointsBinned = 0;
380
381 if (!SDDS_InitializeInput(&inTable, inputfile) ||
382 SDDS_GetColumnIndex(&inTable, dataColumn) < 0 ||
383 (weightColumn && SDDS_GetColumnIndex(&inTable, weightColumn) < 0) ||
384 (filterColumn && SDDS_GetColumnIndex(&inTable, filterColumn) < 0))
385 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
386 if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, &SDDSregion, regionNameColumn, doStats, bins, binSize, normalizeMode, columnMajorOrder))
387 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
388
389 data = weightData = filterData = NULL;
390 while ((readCode = SDDS_ReadPage(&inTable)) > 0) {
391 if ((rows = SDDS_CountRowsOfInterest(&inTable)) < 0)
392 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
393 if (rows && (!(data = SDDS_GetColumnInDoubles(&inTable, dataColumn)) ||
394 (weightColumn && !(weightData = SDDS_GetColumnInDoubles(&inTable, weightColumn))) ||
395 (filterColumn && !(filterData = SDDS_GetColumnInDoubles(&inTable, filterColumn)))))
396 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
397
398 if (rows && filterColumn)
399 points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter);
400 else
401 points = rows;
402
403 pointsBinned = 0;
404 if (points) {
405 if (doStats) {
406 if (!weightColumn)
407 computeMomentsThreaded(&mean, &rms, &standDev, &mad, data, points, threads);
408 else
409 computeWeightedMomentsThreaded(&mean, &rms, &standDev, &mad, data, weightData, points, threads);
410 }
411
412 if (regionFlags) {
413 classifyByRegion(data, weightData, points, hist, regionPosition, bins);
414 hist1 = hist;
415 } else {
416 if (!lowerLimitGiven) {
417 lowerLimit = (points > 0) ? data[0] : 0;
418 for (i = 0; i < points; i++)
419 if (lowerLimit > data[i])
420 lowerLimit = data[i];
421 } else {
422 lowerLimit = givenLowerLimit;
423 }
424 if (!upperLimitGiven) {
425 upperLimit = (points > 0) ? data[0] : 0;
426 for (i = 0; i < points; i++)
427 if (upperLimit < data[i])
428 upperLimit = data[i];
429 } else {
430 upperLimit = givenUpperLimit;
431 }
432
433 range = upperLimit - lowerLimit;
434 if (!lowerLimitGiven)
435 lowerLimit -= range * 1e-7;
436 if (!upperLimitGiven)
437 upperLimit += range * 1e-7;
438 if (upperLimit == lowerLimit) {
439 if (binSize) {
440 upperLimit += binSize / 2;
441 lowerLimit -= binSize / 2;
442 } else if (fabs(upperLimit) < sqrt(DBL_MIN)) {
443 upperLimit = sqrt(DBL_MIN);
444 lowerLimit = -sqrt(DBL_MIN);
445 } else {
446 upperLimit += upperLimit * (1 + 2 * DBL_EPSILON);
447 lowerLimit -= upperLimit * (1 - 2 * DBL_EPSILON);
448 }
449 }
450 if (expansionFactor > 0) {
451 double center = (upperLimit + lowerLimit) / 2;
452 range = expansionFactor * (upperLimit - lowerLimit);
453 lowerLimit = center - range / 2;
454 upperLimit = center + range / 2;
455 }
456 dx = (upperLimit - lowerLimit) / bins;
457
458 if (binSize) {
459 double middle;
460 range = ((range / binSize) + 1) * binSize;
461 middle = (lowerLimit + upperLimit) / 2;
462 lowerLimit = middle - range / 2;
463 upperLimit = middle + range / 2;
464 dx = binSize;
465 bins = range / binSize + 0.5;
466 if (bins < 1 && !doSides)
467 bins = 2 * doSides;
468 indep = trealloc(indep, sizeof(*indep) * (bins + 2 * doSides));
469 hist = trealloc(hist, sizeof(*hist) * (bins + 2 * doSides));
470 CDF = trealloc(CDF, sizeof(*hist) * (bins + 2 * doSides));
471 }
472
473 for (i = -doSides; i < bins + doSides; i++)
474 indep[i + doSides] = (i + 0.5) * dx + lowerLimit;
475 hist1 = hist + doSides;
476 CDF1 = CDF + doSides;
477 if (doSides) {
478 hist[0] = hist[bins + doSides] = 0;
479 }
480
481 if (!weightColumn)
482 pointsBinned = make_histogram(hist1, bins, lowerLimit, upperLimit, data, points, 1);
483 else
484 pointsBinned = make_histogram_weighted(hist1, bins, lowerLimit, upperLimit, data, points, 1, weightData);
485 }
486
487 sum = 0;
488 for (i = 0; i < bins + doSides; i++) {
489 sum += hist1[i];
490 }
491 CDF1[0] = hist1[0] / sum;
492 for (i = 1; i < bins + doSides; i++) {
493 CDF1[i] = CDF1[i - 1] + hist1[i] / sum;
494 }
495
496 if (verbose)
497 fprintf(stderr, "%ld points of %" PRId64 " from page %ld histogrammed in %" PRId64 " bins\n", pointsBinned, rows, readCode, bins);
498 if (!cdfOnly) {
499 if (normalizeMode != NORMALIZE_NO) {
500 double norm = 0;
501 switch (normalizeMode) {
502 case NORMALIZE_PEAK:
503 norm = max_in_array(hist1, bins);
504 break;
505 case NORMALIZE_AREA:
506 case NORMALIZE_SUM:
507 for (i = 0; i < bins; i++)
508 norm += hist1[i];
509 if (normalizeMode == NORMALIZE_AREA)
510 norm *= dx;
511 break;
512 default:
513 SDDS_Bomb("invalid normalize mode--consult programmer.");
514 break;
515 }
516 if (norm)
517 for (i = 0; i < bins; i++)
518 hist1[i] /= norm;
519 }
520 }
521 }
522
523 if (regionFlags) {
524 if (!SDDS_StartPage(&outTable, bins) ||
525 !SDDS_CopyParameters(&outTable, &inTable) ||
526 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
527 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
528 if (points) {
529 if (!SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, regionPosition, bins, iIndep) ||
530 !SDDS_SetColumn(&outTable, SDDS_SET_BY_NAME, regionName, bins, regionNameColumn))
531 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
532 if (!freOnly && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins, iCdf))
533 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
534 if (!cdfOnly && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins, iFreq))
535 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
536 }
537 } else {
538 if (!SDDS_StartPage(&outTable, bins + 2 * doSides) ||
539 !SDDS_CopyParameters(&outTable, &inTable) ||
540 (points && (!SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, indep, bins + 2 * doSides, iIndep))) ||
541 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
542 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
543 if (!freOnly) {
544 if (points && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins + 2 * doSides, iCdf))
545 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
546 }
547 if (!cdfOnly) {
548 if (points && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins + 2 * doSides, iFreq))
549 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
550 }
551 }
552
553 if (filterColumn && points &&
554 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iLoFilter, lowerFilter, iUpFilter, upperFilter, -1))
555 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
556 if (doStats && points &&
557 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iMean, mean, iRMS, rms, iStDev, standDev, -1))
558 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
559
560 if (!SDDS_WritePage(&outTable))
561 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
562 if (data)
563 free(data);
564 if (weightData)
565 free(weightData);
566 if (filterData)
567 free(filterData);
568 data = weightData = filterData = NULL;
569 }
570
571 if (!SDDS_Terminate(&inTable)) {
572 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
573 exit(EXIT_FAILURE);
574 }
575 if (!SDDS_Terminate(&outTable)) {
576 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
577 exit(EXIT_FAILURE);
578 }
579 return EXIT_SUCCESS;
580}
581
582/* routine: filter()
583 * purpose: filter set of points {(x, y)} with a specified
584 * window in one of the variables. Works in place.
585 */
586static int64_t filter(double *x, double *y, double *filterData, int64_t npts, double lower_filter, double upper_filter) {
587 int64_t i, j;
588 static char *keep = NULL;
589 static int64_t maxPoints = 0;
590
591 if (maxPoints < npts)
592 keep = trealloc(keep, sizeof(*keep) * (maxPoints = npts));
593
594 for (i = 0; i < npts; i++) {
595 if (filterData[i] < lower_filter || filterData[i] > upper_filter)
596 keep[i] = 0;
597 else
598 keep[i] = 1;
599 }
600
601 for (i = j = 0; i < npts; i++)
602 if (keep[i]) {
603 if (i != j) {
604 if (x)
605 x[j] = x[i];
606 if (y)
607 y[j] = y[i];
608 filterData[j] = filterData[i];
609 }
610 j++;
611 }
612
613 return j;
614}
615
616static long setupOutputFile(SDDS_DATASET *outTable, char *outputfile, SDDS_DATASET *inTable, char *inputfile,
617 char *dataColumn, char *weightColumn, char *filterColumn, double lowerFilter,
618 double upperFilter, SDDS_DATASET *regionTable, char *regionNameColumn, long doStats,
619 int64_t bins, double binSize, long normalizeMode, short columnMajorOrder) {
620 char *symbol, *units, *dataUnits, *outputFormat;
621 int32_t outputType;
622 char s[1024];
623
624 if (!SDDS_InitializeOutput(outTable, SDDS_BINARY, 0, NULL, "sddshist output", outputfile))
625 return 0;
626 if (columnMajorOrder != -1)
627 outTable->layout.data_mode.column_major = columnMajorOrder;
628 else
629 outTable->layout.data_mode.column_major = inTable->layout.data_mode.column_major;
630 if (!SDDS_GetColumnInformation(inTable, "units", &dataUnits, SDDS_GET_BY_NAME, dataColumn))
631 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
632
633 /* Define output columns */
634 outputType = SDDS_DOUBLE;
635 outputFormat = NULL;
636
637 if (!SDDS_TransferColumnDefinition(outTable, inTable, dataColumn, NULL) ||
638 !SDDS_ChangeColumnInformation(outTable, "type", &outputType, SDDS_BY_NAME, dataColumn) ||
639 !SDDS_ChangeColumnInformation(outTable, "format_string", &outputFormat, SDDS_BY_NAME, dataColumn) ||
640 (iIndep = SDDS_GetColumnIndex(outTable, dataColumn)) < 0)
641 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
642 if (regionNameColumn && !SDDS_TransferColumnDefinition(outTable, regionTable, regionNameColumn, NULL))
643 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
644 if (!cdfOnly) {
645 switch (normalizeMode) {
646 case NORMALIZE_PEAK:
647 symbol = "RelativeFrequency";
648 units = NULL;
649 break;
650 case NORMALIZE_AREA:
651 symbol = "NormalizedFrequency";
652 if (dataUnits && !SDDS_StringIsBlank(dataUnits)) {
653 units = tmalloc(sizeof(*units) * (strlen(dataUnits) + 5));
654 if (strchr(dataUnits, ' '))
655 sprintf(units, "1/(%s)", dataUnits);
656 else
657 sprintf(units, "1/%s", dataUnits);
658 } else
659 units = NULL;
660 break;
661 case NORMALIZE_SUM:
662 symbol = "FractionalFrequency";
663 units = NULL;
664 break;
665 default:
666 if (weightColumn) {
667 char *weightUnits = NULL;
668 if (weightColumn && !SDDS_GetColumnInformation(inTable, "units", &weightUnits, SDDS_GET_BY_NAME, weightColumn))
669 return 0;
670 symbol = "WeightedNumberOfOccurrences";
671 units = weightUnits;
672 } else {
673 symbol = "NumberOfOccurrences";
674 units = NULL;
675 }
676 break;
677 }
678
679 if ((iFreq = SDDS_DefineColumn(outTable, "frequency", symbol, units, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
680 return 0;
681 free(units);
682 units = NULL;
683 }
684 if (!freOnly) {
685 sprintf(s, "%sCdf", dataColumn);
686 if ((iCdf = SDDS_DefineColumn(outTable, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
687 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
688 }
689
690 /* Define output parameters */
691 if (SDDS_DefineParameter(outTable, "sddshistInput", NULL, NULL, NULL, NULL, SDDS_STRING,
692 inputfile) < 0 ||
693 (weightColumn && SDDS_DefineParameter(outTable, "sddshistWeight", NULL, NULL, NULL, NULL,
694 SDDS_STRING, weightColumn) < 0) ||
695 (iBins = SDDS_DefineParameter(outTable, "sddshistBins", NULL, NULL, NULL, NULL, SDDS_LONG,
696 NULL)) < 0 ||
697 (iBinSize = SDDS_DefineParameter(outTable, "sddshistBinSize", NULL, NULL, NULL, NULL, SDDS_DOUBLE, NULL)) < 0 ||
698 (iPoints = SDDS_DefineParameter(outTable, "sddshistBinned", NULL, NULL, NULL, NULL, SDDS_LONG, NULL)) < 0)
699 return 0;
700 if (filterColumn) {
701 char *filterUnits;
702 if (!SDDS_GetColumnInformation(inTable, "units", &filterUnits, SDDS_GET_BY_NAME, filterColumn) ||
703 SDDS_DefineParameter(outTable, "sddshistFilter", NULL, NULL, NULL, NULL, SDDS_STRING,
704 filterColumn) < 0 ||
705 (iLoFilter = SDDS_DefineParameter(outTable, "sddshistLowerFilter", NULL, filterUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0 ||
706 (iUpFilter = SDDS_DefineParameter(outTable, "sddshistUpperFilter", NULL, filterUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
707 return 0;
708 if (filterUnits)
709 free(filterUnits);
710 }
711 if (doStats) {
712 char *buffer;
713 buffer = tmalloc(sizeof(*buffer) * (strlen(dataColumn) + 20));
714 sprintf(buffer, "%sMean", dataColumn);
715 if ((iMean = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
716 return 0;
717 sprintf(buffer, "%sRms", dataColumn);
718 if ((iRMS = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
719 return 0;
720 sprintf(buffer, "%sStDev", dataColumn);
721 if ((iStDev = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
722 return 0;
723 free(buffer);
724 }
725 if (SDDS_DefineParameter(outTable, "sddshistNormMode", NULL, NULL, NULL, NULL, SDDS_STRING, normalize_option[normalizeMode]) < 0 ||
726 !SDDS_TransferAllParameterDefinitions(outTable, inTable, SDDS_TRANSFER_KEEPOLD) ||
727 !SDDS_WriteLayout(outTable))
728 return 0;
729 return 1;
730}
731
732int64_t readRegionFile(SDDS_DATASET *SDDSin, char *filename, char *positionColumn, char *nameColumn, double **regionPosition, char ***regionName) {
733 int64_t i, rows = 0;
734 if (!SDDS_InitializeInput(SDDSin, filename) ||
735 SDDS_ReadPage(SDDSin) != 1 || (rows = SDDS_RowCount(SDDSin)) < 1 ||
736 !(*regionPosition = SDDS_GetColumnInDoubles(SDDSin, positionColumn)) ||
737 !(*regionName = (char **)SDDS_GetColumn(SDDSin, nameColumn)))
738 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
739 for (i = 1; i < rows; i++)
740 if ((*regionPosition)[i] <= (*regionPosition)[i - 1]) {
741 fprintf(stderr, "sddshist: Error in region position data: row %" PRId64 " is %21.15e while row %" PRId64 " is %21.15e\n",
742 i - 1, (*regionPosition)[i - 1], i, (*regionPosition)[i]);
743 exit(EXIT_FAILURE);
744 }
745 *regionPosition = SDDS_Realloc(*regionPosition, sizeof(**regionPosition) * (rows + 1));
746 (*regionPosition)[rows] = DBL_MAX;
747 *regionName = SDDS_Realloc(*regionName, sizeof(**regionName) * (rows + 1));
748 cp_str(&(*regionName)[rows], "Beyond");
749 return rows;
750}
751
752void classifyByRegion(double *data, double *weight, int64_t points, double *histogram, double *regionPosition, int64_t bins) {
753 int64_t iData, iBin;
754
755 for (iBin = 0; iBin < bins; iBin++)
756 histogram[iBin] = 0;
757
758 for (iData = 0; iData < points; iData++) {
759 /* Note that bins is 1 greater than the number of positions */
760 for (iBin = 0; iBin < bins - 1; iBin++) {
761 if (data[iData] < regionPosition[iBin])
762 break;
763 }
764 if (weight) {
765 histogram[iBin] += weight[iData];
766 } else {
767 histogram[iBin] += 1;
768 }
769 }
770}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
Definition cp_str.c:28
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
Definition findMinMax.c:318
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeWeightedMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, double *w, long n, long numThreads)
Computes weighted statistical moments of an array using multiple threads.
Definition moments.c:239
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.