SDDSlib
Loading...
Searching...
No Matches
sddshist.c
Go to the documentation of this file.
1/**
2 * @file sddshist.c
3 * @brief SDDS-format Histogram Command-Line Tool
4 *
5 * This program generates histograms from SDDS-formatted data files.
6 * It is based on the `mpl` program `hist`.
7 *
8 * @section Usage
9 * ```
10 * sddshist [<inputfile>] [<outputfile>] [options]
11 * ```
12 *
13 * **Options:**
14 * - `-pipe=[input][,output]`: Use pipe for input and/or output.
15 * - `-dataColumn=<column-name>`: Specify the column to histogram.
16 * - `-bins=<number>`: Set the number of bins for the histogram.
17 * - `-sizeOfBins=<value>`: Set the size of each bin.
18 * - `-regions=filename=<filename>,position=<columnName>,name=<columnName>`: Define region-based histogramming.
19 * - `-lowerLimit=<value>`: Set the lower limit of the histogram.
20 * - `-upperLimit=<value>`: Set the upper limit of the histogram.
21 * - `-expand=<factor>`: Expand the range of the histogram by the given factor.
22 * - `-filter=<column-name>,<lower-limit>,<upper-limit>`: Filter data points based on column values.
23 * - `-weightColumn=<column-name>`: Weight the histogram with the specified column.
24 * - `-sides[=<points>]`: Add sides to the histogram down to the zero level.
25 * - `-normalize[={sum|area|peak}]`: Normalize the histogram.
26 * - `-cdf[=only]`: Include the Cumulative Distribution Function (CDF) in the output.
27 * - `-threads=<number>`: Specify the number of threads to use.
28 * - `-statistics`: Include statistical information in the output.
29 * - `-verbose`: Enable informational printouts during processing.
30 * - `-majorOrder=row|column`: Set the major order of data.
31 *
32 * @copyright
33 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
34 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
35 *
36 * @license
37 * This file is distributed under the terms of the Software License Agreement
38 * found in the file LICENSE included with this distribution.
39 *
40 * @author M. Borland, C. Saunders, R. Soliday, H. Shang
41 */
42
43#include "mdb.h"
44#include "scan.h"
45#include "SDDS.h"
46
47/* Enumeration for option types */
48enum option_type {
49 SET_BINS,
50 SET_LOWERLIMIT,
51 SET_UPPERLIMIT,
52 SET_DATACOLUMN,
53 SET_FILTER,
54 SET_BINSIZE,
55 SET_WEIGHTCOLUMN,
56 SET_NORMALIZE,
57 SET_STATISTICS,
58 SET_SIDES,
59 SET_VERBOSE,
60 SET_PIPE,
61 SET_CDF,
62 SET_EXPAND,
63 SET_MAJOR_ORDER,
64 SET_REGION_FILE,
65 SET_THREADS,
66 N_OPTIONS
67};
68
69char *option[N_OPTIONS] = {
70 "bins", "lowerlimit", "upperlimit", "datacolumn",
71 "filter", "sizeofbins", "weightcolumn",
72 "normalize", "statistics", "sides", "verbose", "pipe", "cdf", "expand",
73 "majorOrder", "regions", "threads"
74};
75
76char *USAGE =
77 "Usage: sddshist [<inputfile>] [<outputfile>] [options]\n"
78 "Options:\n"
79 " -pipe=[input][,output] Use pipe for input and/or output.\n"
80 " -dataColumn=<column-name> Specify the column to histogram.\n"
81 " -bins=<number> Set the number of bins for the histogram.\n"
82 " -sizeOfBins=<value> Set the size of each bin.\n"
83 " -regions=filename=<filename>,position=<columnName>,name=<columnName>\n"
84 " Define region-based histogramming.\n"
85 " -lowerLimit=<value> Set the lower limit of the histogram.\n"
86 " -upperLimit=<value> Set the upper limit of the histogram.\n"
87 " -expand=<factor> Expand the range of the histogram by the given factor.\n"
88 " -filter=<column-name>,<lower>,<upper> Filter data points based on column values.\n"
89 " -weightColumn=<column-name> Weight the histogram with the specified column.\n"
90 " -sides[=<points>] Add sides to the histogram down to zero level.\n"
91 " -normalize[={sum|area|peak}] Normalize the histogram.\n"
92 " -cdf[=only] Include the CDF in the output. Use 'only' to exclude the histogram.\n"
93 " -threads=<number> Specify the number of threads to use.\n"
94 " -statistics Include statistical information in the output.\n"
95 " -verbose Enable informational printouts during processing.\n"
96 " -majorOrder=row|column Set the major order of data.\n";
97
98static char *additional_help = "\n\
99bins Number of bins for the histogram.\n\
100sizeOfBins Size of each bin for the histogram.\n\
101regions File defining region-based histogramming with bin boundaries and names.\n\
102lowerLimit Lower limit of the histogram.\n\
103upperLimit Upper limit of the histogram.\n\
104expand Expand the range of the histogram by the given factor.\n\
105dataColumn Name of the column to histogram.\n\
106filter Include only data points where the specified column's value is between the given limits.\n\
107weightColumn Weight the histogram using the specified column's values.\n\
108normalize Normalize the histogram as specified (sum, area, peak).\n\
109statistics Include statistical information (mean, RMS, standard deviation) in the output.\n\
110sides Add sides to the histogram down to the zero level.\n\
111cdf Include the Cumulative Distribution Function (CDF) in the output. Use 'only' to exclude the histogram.\n\
112verbose Enable informational printouts during processing.\n\n\
113Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
114
115#define NORMALIZE_PEAK 0
116#define NORMALIZE_AREA 1
117#define NORMALIZE_SUM 2
118#define NORMALIZE_NO 3
119#define N_NORMALIZE_OPTIONS 4
120char *normalize_option[N_NORMALIZE_OPTIONS] = {
121 "peak", "area", "sum", "no"
122};
123
124static int64_t filter(double *x, double *y, double *filterData, int64_t npts, double lower_filter, double upper_filter);
125static long setupOutputFile(SDDS_DATASET *outTable, char *outputfile, SDDS_DATASET *inTable, char *inputfile,
126 char *dataColumn, char *weightColumn, char *filterColumn, double lowerFilter,
127 double upperFilter, SDDS_DATASET *regionTable, char *regionNameColumn, long doStats,
128 int64_t bins, double binSize, long normalizeMode, short columnMajorOrder);
129
130/* Column and parameter indices for output file */
131static long iIndep, iFreq, iBins, iBinSize, iLoFilter, iUpFilter, iMean, iRMS, iStDev, iPoints, iCdf;
132static short cdfOnly, freOnly;
133
134int64_t readRegionFile(SDDS_DATASET *SDDSin, char *filename, char *positionColumn, char *nameColumn, double **regionPosition, char ***regionName);
135void classifyByRegion(double *data, double *weight, int64_t points, double *histogram, double *regionPosition, int64_t bins);
136
137int main(int argc, char **argv) {
138 /* Flags to keep track of what is set in command line */
139 long binsGiven, lowerLimitGiven, upperLimitGiven;
140 SDDS_DATASET inTable, outTable;
141 double *data; /* Pointer to the array to histogram */
142 double *filterData; /* Pointer to the filter data */
143 double *weightData; /* Pointer to the weight data */
144 double *hist, *hist1; /* To store the histogram */
145 double *CDF, *CDF1; /* To store the CDF */
146 double sum; /* Total of histogram */
147 double *indep; /* Values of bin centers */
148 double lowerLimit, upperLimit; /* Lower and upper limits in histogram */
149 double givenLowerLimit, givenUpperLimit; /* Given lower and upper limits */
150 double range, binSize;
151 int64_t bins; /* Number of bins in the histogram */
152 long doStats; /* Include statistics in output file */
153 double mean, rms, standDev, mad;
154 char *filterColumn, *dataColumn, *weightColumn;
155 double lowerFilter = 0, upperFilter = 0; /* Filter range */
156 int64_t points; /* Number of data points after filtering */
157 SCANNED_ARG *scanned; /* Scanned argument structure */
158 char *inputfile, *outputfile; /* Input and output filenames */
159 double dx; /* Spacing of bins in histogram */
160 int64_t i; /* Loop variable */
161 long pointsBinned; /* Number of points in histogram */
162 long normalizeMode, doSides, verbose, readCode;
163 int64_t rows;
164 unsigned long pipeFlags, majorOrderFlag, regionFlags = 0;
165 char *cdf;
166 double expansionFactor = 0;
167 short columnMajorOrder = -1;
168 char *regionFilename = NULL, *regionPositionColumn = NULL, *regionNameColumn = NULL;
169 double *regionPosition = NULL;
170 int64_t nRegions = 0;
171 char **regionName = NULL;
172 SDDS_DATASET SDDSregion;
173 int threads = 1;
174
176 argc = scanargs(&scanned, argc, argv);
177 if (argc < 3) {
178 fprintf(stderr, "%s\n", USAGE);
179 fputs(additional_help, stderr);
180 exit(EXIT_FAILURE);
181 }
182
183 binsGiven = lowerLimitGiven = upperLimitGiven = 0;
184 binSize = doSides = 0;
185 inputfile = outputfile = NULL;
186 dataColumn = filterColumn = weightColumn = NULL;
187 doStats = verbose = 0;
188 normalizeMode = NORMALIZE_NO;
189 pipeFlags = 0;
190 dx = 0;
191 cdfOnly = 0;
192 freOnly = 1;
193
194 for (i = 1; i < argc; i++) {
195 if (scanned[i].arg_type == OPTION) {
196 switch (match_string(scanned[i].list[0], option, N_OPTIONS, 0)) {
197 case SET_MAJOR_ORDER:
198 majorOrderFlag = 0;
199 scanned[i].n_items--;
200 if (scanned[i].n_items > 0 && (!scanItemList(&majorOrderFlag, scanned[i].list + 1, &scanned[i].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
201 SDDS_Bomb("invalid -majorOrder syntax/values");
202 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
203 columnMajorOrder = 1;
204 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
205 columnMajorOrder = 0;
206 break;
207 case SET_BINS: /* Set number of bins */
208 if (binsGiven)
209 SDDS_Bomb("-bins specified more than once");
210 binsGiven = 1;
211 if (sscanf(scanned[i].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
212 SDDS_Bomb("invalid value for bins");
213 break;
214 case SET_LOWERLIMIT:
215 if (lowerLimitGiven)
216 SDDS_Bomb("-lowerLimit specified more than once");
217 lowerLimitGiven = 1;
218 if (sscanf(scanned[i].list[1], "%lf", &givenLowerLimit) != 1)
219 SDDS_Bomb("invalid value for lowerLimit");
220 break;
221 case SET_UPPERLIMIT:
222 if (upperLimitGiven)
223 SDDS_Bomb("-upperLimit specified more than once");
224 upperLimitGiven = 1;
225 if (sscanf(scanned[i].list[1], "%lf", &givenUpperLimit) != 1)
226 SDDS_Bomb("invalid value for upperLimit");
227 break;
228 case SET_EXPAND:
229 expansionFactor = 0;
230 if (sscanf(scanned[i].list[1], "%lf", &expansionFactor) != 1 || expansionFactor <= 0)
231 SDDS_Bomb("invalid value for expand");
232 break;
233 case SET_DATACOLUMN:
234 if (dataColumn)
235 SDDS_Bomb("-dataColumn specified more than once");
236 if (scanned[i].n_items != 2)
237 SDDS_Bomb("invalid -dataColumn syntax---supply name");
238 dataColumn = scanned[i].list[1];
239 break;
240 case SET_FILTER:
241 if (filterColumn)
242 SDDS_Bomb("multiple filter specifications not allowed");
243 if (scanned[i].n_items != 4 || sscanf(scanned[i].list[2], "%lf", &lowerFilter) != 1 ||
244 sscanf(scanned[i].list[3], "%lf", &upperFilter) != 1 || lowerFilter > upperFilter)
245 SDDS_Bomb("invalid -filter syntax/values");
246 filterColumn = scanned[i].list[1];
247 break;
248 case SET_WEIGHTCOLUMN:
249 if (weightColumn)
250 SDDS_Bomb("multiple weighting columns not allowed");
251 if (scanned[i].n_items != 2)
252 SDDS_Bomb("-weightColumn requires a column name");
253 weightColumn = scanned[i].list[1];
254 break;
255 case SET_NORMALIZE:
256 if (scanned[i].n_items == 1)
257 normalizeMode = NORMALIZE_SUM;
258 else if (scanned[i].n_items != 2 || (normalizeMode = match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
259 SDDS_Bomb("invalid -normalize syntax");
260 break;
261 case SET_STATISTICS:
262 doStats = 1;
263 break;
264 case SET_SIDES:
265 if (scanned[i].n_items == 1)
266 doSides = 1;
267 else if (scanned[i].n_items > 2 || (sscanf(scanned[i].list[1], "%ld", &doSides) != 1 || doSides <= 0))
268 SDDS_Bomb("invalid -sides syntax");
269 break;
270 case SET_VERBOSE:
271 verbose = 1;
272 break;
273 case SET_BINSIZE:
274 if (sscanf(scanned[i].list[1], "%le", &binSize) != 1 || binSize <= 0)
275 SDDS_Bomb("invalid value for bin size");
276 break;
277 case SET_PIPE:
278 if (!processPipeOption(scanned[i].list + 1, scanned[i].n_items - 1, &pipeFlags))
279 SDDS_Bomb("invalid -pipe syntax");
280 break;
281 case SET_CDF:
282 if (scanned[i].n_items == 1)
283 cdfOnly = 0;
284 else {
285 if (scanned[i].n_items != 2)
286 SDDS_Bomb("invalid -cdf syntax");
287 cdf = scanned[i].list[1];
288 if (strcmp(cdf, "only") != 0)
289 SDDS_Bomb("invalid -cdf value, it should be -cdf or -cdf=only");
290 cdfOnly = 1;
291 }
292 freOnly = 0;
293 break;
294 case SET_REGION_FILE:
295 if (scanned[i].n_items != 4)
296 SDDS_Bomb("invalid -regionFile syntax");
297 regionFlags = 0;
298 scanned[i].n_items -= 1;
299 if (!scanItemList(&regionFlags, scanned[i].list + 1, &scanned[i].n_items, 0,
300 "filename", SDDS_STRING, &regionFilename, 1, 1,
301 "position", SDDS_STRING, &regionPositionColumn, 1, 2,
302 "name", SDDS_STRING, &regionNameColumn, 1, 4, NULL) ||
303 regionFlags != (1 + 2 + 4) || !regionFilename || !regionPositionColumn || !regionNameColumn)
304 SDDS_Bomb("invalid -regionFile syntax");
305 break;
306 case SET_THREADS:
307 if (scanned[i].n_items != 2 ||
308 !sscanf(scanned[i].list[1], "%d", &threads) || threads < 1)
309 SDDS_Bomb("invalid -threads syntax");
310 break;
311 default:
312 fprintf(stderr, "Error: option %s not recognized\n", scanned[i].list[0]);
313 exit(EXIT_FAILURE);
314 break;
315 }
316 } else {
317 /* Argument is filename */
318 if (!inputfile)
319 inputfile = scanned[i].list[0];
320 else if (!outputfile)
321 outputfile = scanned[i].list[0];
322 else
323 SDDS_Bomb("too many filenames seen");
324 }
325 }
326
327 processFilenames("sddshist", &inputfile, &outputfile, pipeFlags, 0, NULL);
328
329 if (binSize && binsGiven && regionFlags)
330 SDDS_Bomb("Provide only one of -bins, -sizeOfBins, or -regions");
331 if (!binsGiven)
332 bins = 20;
333 if (!dataColumn)
334 SDDS_Bomb("-dataColumn must be specified");
335
336 if (regionFlags) {
337 if (!(nRegions = readRegionFile(&SDDSregion, regionFilename, regionPositionColumn, regionNameColumn, &regionPosition, &regionName)))
338 SDDS_Bomb("Problem with region file. Check existence and type of columns");
339 doSides = 0;
340 bins = nRegions + 1;
341 }
342
343 hist = tmalloc(sizeof(*hist) * (bins + 2 * doSides));
344 CDF = CDF1 = tmalloc(sizeof(*hist) * (bins + 2 * doSides));
345 indep = tmalloc(sizeof(*indep) * (bins + 2 * doSides));
346 pointsBinned = 0;
347
348 if (!SDDS_InitializeInput(&inTable, inputfile) ||
349 SDDS_GetColumnIndex(&inTable, dataColumn) < 0 ||
350 (weightColumn && SDDS_GetColumnIndex(&inTable, weightColumn) < 0) ||
351 (filterColumn && SDDS_GetColumnIndex(&inTable, filterColumn) < 0))
352 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
353 if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, &SDDSregion, regionNameColumn, doStats, bins, binSize, normalizeMode, columnMajorOrder))
354 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
355
356 data = weightData = filterData = NULL;
357 while ((readCode = SDDS_ReadPage(&inTable)) > 0) {
358 if ((rows = SDDS_CountRowsOfInterest(&inTable)) < 0)
359 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
360 if (rows && (!(data = SDDS_GetColumnInDoubles(&inTable, dataColumn)) ||
361 (weightColumn && !(weightData = SDDS_GetColumnInDoubles(&inTable, weightColumn))) ||
362 (filterColumn && !(filterData = SDDS_GetColumnInDoubles(&inTable, filterColumn)))))
363 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
364
365 if (rows && filterColumn)
366 points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter);
367 else
368 points = rows;
369
370 pointsBinned = 0;
371 if (points) {
372 if (doStats) {
373 if (!weightColumn)
374 computeMomentsThreaded(&mean, &rms, &standDev, &mad, data, points, threads);
375 else
376 computeWeightedMomentsThreaded(&mean, &rms, &standDev, &mad, data, weightData, points, threads);
377 }
378
379 if (regionFlags) {
380 classifyByRegion(data, weightData, points, hist, regionPosition, bins);
381 hist1 = hist;
382 } else {
383 if (!lowerLimitGiven) {
384 lowerLimit = (points > 0) ? data[0] : 0;
385 for (i = 0; i < points; i++)
386 if (lowerLimit > data[i])
387 lowerLimit = data[i];
388 } else {
389 lowerLimit = givenLowerLimit;
390 }
391 if (!upperLimitGiven) {
392 upperLimit = (points > 0) ? data[0] : 0;
393 for (i = 0; i < points; i++)
394 if (upperLimit < data[i])
395 upperLimit = data[i];
396 } else {
397 upperLimit = givenUpperLimit;
398 }
399
400 range = upperLimit - lowerLimit;
401 if (!lowerLimitGiven)
402 lowerLimit -= range * 1e-7;
403 if (!upperLimitGiven)
404 upperLimit += range * 1e-7;
405 if (upperLimit == lowerLimit) {
406 if (binSize) {
407 upperLimit += binSize / 2;
408 lowerLimit -= binSize / 2;
409 } else if (fabs(upperLimit) < sqrt(DBL_MIN)) {
410 upperLimit = sqrt(DBL_MIN);
411 lowerLimit = -sqrt(DBL_MIN);
412 } else {
413 upperLimit += upperLimit * (1 + 2 * DBL_EPSILON);
414 lowerLimit -= upperLimit * (1 - 2 * DBL_EPSILON);
415 }
416 }
417 if (expansionFactor > 0) {
418 double center = (upperLimit + lowerLimit) / 2;
419 range = expansionFactor * (upperLimit - lowerLimit);
420 lowerLimit = center - range / 2;
421 upperLimit = center + range / 2;
422 }
423 dx = (upperLimit - lowerLimit) / bins;
424
425 if (binSize) {
426 double middle;
427 range = ((range / binSize) + 1) * binSize;
428 middle = (lowerLimit + upperLimit) / 2;
429 lowerLimit = middle - range / 2;
430 upperLimit = middle + range / 2;
431 dx = binSize;
432 bins = range / binSize + 0.5;
433 if (bins < 1 && !doSides)
434 bins = 2 * doSides;
435 indep = trealloc(indep, sizeof(*indep) * (bins + 2 * doSides));
436 hist = trealloc(hist, sizeof(*hist) * (bins + 2 * doSides));
437 CDF = trealloc(CDF, sizeof(*hist) * (bins + 2 * doSides));
438 }
439
440 for (i = -doSides; i < bins + doSides; i++)
441 indep[i + doSides] = (i + 0.5) * dx + lowerLimit;
442 hist1 = hist + doSides;
443 CDF1 = CDF + doSides;
444 if (doSides) {
445 hist[0] = hist[bins + doSides] = 0;
446 }
447
448 if (!weightColumn)
449 pointsBinned = make_histogram(hist1, bins, lowerLimit, upperLimit, data, points, 1);
450 else
451 pointsBinned = make_histogram_weighted(hist1, bins, lowerLimit, upperLimit, data, points, 1, weightData);
452 }
453
454 sum = 0;
455 for (i = 0; i < bins + doSides; i++) {
456 sum += hist1[i];
457 }
458 CDF1[0] = hist1[0] / sum;
459 for (i = 1; i < bins + doSides; i++) {
460 CDF1[i] = CDF1[i - 1] + hist1[i] / sum;
461 }
462
463 if (verbose)
464 fprintf(stderr, "%ld points of %" PRId64 " from page %ld histogrammed in %" PRId64 " bins\n", pointsBinned, rows, readCode, bins);
465 if (!cdfOnly) {
466 if (normalizeMode != NORMALIZE_NO) {
467 double norm = 0;
468 switch (normalizeMode) {
469 case NORMALIZE_PEAK:
470 norm = max_in_array(hist1, bins);
471 break;
472 case NORMALIZE_AREA:
473 case NORMALIZE_SUM:
474 for (i = 0; i < bins; i++)
475 norm += hist1[i];
476 if (normalizeMode == NORMALIZE_AREA)
477 norm *= dx;
478 break;
479 default:
480 SDDS_Bomb("invalid normalize mode--consult programmer.");
481 break;
482 }
483 if (norm)
484 for (i = 0; i < bins; i++)
485 hist1[i] /= norm;
486 }
487 }
488 }
489
490 if (regionFlags) {
491 if (!SDDS_StartPage(&outTable, bins) ||
492 !SDDS_CopyParameters(&outTable, &inTable) ||
493 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
494 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
495 if (points) {
496 if (!SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, regionPosition, bins, iIndep) ||
497 !SDDS_SetColumn(&outTable, SDDS_SET_BY_NAME, regionName, bins, regionNameColumn))
498 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
499 if (!freOnly && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins, iCdf))
500 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
501 if (!cdfOnly && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins, iFreq))
502 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
503 }
504 } else {
505 if (!SDDS_StartPage(&outTable, bins + 2 * doSides) ||
506 !SDDS_CopyParameters(&outTable, &inTable) ||
507 (points && (!SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, indep, bins + 2 * doSides, iIndep))) ||
508 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
509 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
510 if (!freOnly) {
511 if (points && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins + 2 * doSides, iCdf))
512 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
513 }
514 if (!cdfOnly) {
515 if (points && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins + 2 * doSides, iFreq))
516 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
517 }
518 }
519
520 if (filterColumn && points &&
521 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iLoFilter, lowerFilter, iUpFilter, upperFilter, -1))
522 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
523 if (doStats && points &&
524 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iMean, mean, iRMS, rms, iStDev, standDev, -1))
525 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
526
527 if (!SDDS_WritePage(&outTable))
528 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
529 if (data)
530 free(data);
531 if (weightData)
532 free(weightData);
533 if (filterData)
534 free(filterData);
535 data = weightData = filterData = NULL;
536 }
537
538 if (!SDDS_Terminate(&inTable)) {
539 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
540 exit(EXIT_FAILURE);
541 }
542 if (!SDDS_Terminate(&outTable)) {
543 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
544 exit(EXIT_FAILURE);
545 }
546 return EXIT_SUCCESS;
547}
548
549/* routine: filter()
550 * purpose: filter set of points {(x, y)} with a specified
551 * window in one of the variables. Works in place.
552 */
553static int64_t filter(double *x, double *y, double *filterData, int64_t npts, double lower_filter, double upper_filter) {
554 int64_t i, j;
555 static char *keep = NULL;
556 static int64_t maxPoints = 0;
557
558 if (maxPoints < npts)
559 keep = trealloc(keep, sizeof(*keep) * (maxPoints = npts));
560
561 for (i = 0; i < npts; i++) {
562 if (filterData[i] < lower_filter || filterData[i] > upper_filter)
563 keep[i] = 0;
564 else
565 keep[i] = 1;
566 }
567
568 for (i = j = 0; i < npts; i++)
569 if (keep[i]) {
570 if (i != j) {
571 if (x)
572 x[j] = x[i];
573 if (y)
574 y[j] = y[i];
575 filterData[j] = filterData[i];
576 }
577 j++;
578 }
579
580 return j;
581}
582
583static long setupOutputFile(SDDS_DATASET *outTable, char *outputfile, SDDS_DATASET *inTable, char *inputfile,
584 char *dataColumn, char *weightColumn, char *filterColumn, double lowerFilter,
585 double upperFilter, SDDS_DATASET *regionTable, char *regionNameColumn, long doStats,
586 int64_t bins, double binSize, long normalizeMode, short columnMajorOrder) {
587 char *symbol, *units, *dataUnits, *outputFormat;
588 int32_t outputType;
589 char s[1024];
590
591 if (!SDDS_InitializeOutput(outTable, SDDS_BINARY, 0, NULL, "sddshist output", outputfile))
592 return 0;
593 if (columnMajorOrder != -1)
594 outTable->layout.data_mode.column_major = columnMajorOrder;
595 else
596 outTable->layout.data_mode.column_major = inTable->layout.data_mode.column_major;
597 if (!SDDS_GetColumnInformation(inTable, "units", &dataUnits, SDDS_GET_BY_NAME, dataColumn))
598 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
599
600 /* Define output columns */
601 outputType = SDDS_DOUBLE;
602 outputFormat = NULL;
603
604 if (!SDDS_TransferColumnDefinition(outTable, inTable, dataColumn, NULL) ||
605 !SDDS_ChangeColumnInformation(outTable, "type", &outputType, SDDS_BY_NAME, dataColumn) ||
606 !SDDS_ChangeColumnInformation(outTable, "format_string", &outputFormat, SDDS_BY_NAME, dataColumn) ||
607 (iIndep = SDDS_GetColumnIndex(outTable, dataColumn)) < 0)
608 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
609 if (regionNameColumn && !SDDS_TransferColumnDefinition(outTable, regionTable, regionNameColumn, NULL))
610 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
611 if (!cdfOnly) {
612 switch (normalizeMode) {
613 case NORMALIZE_PEAK:
614 symbol = "RelativeFrequency";
615 units = NULL;
616 break;
617 case NORMALIZE_AREA:
618 symbol = "NormalizedFrequency";
619 if (dataUnits && !SDDS_StringIsBlank(dataUnits)) {
620 units = tmalloc(sizeof(*units) * (strlen(dataUnits) + 5));
621 if (strchr(dataUnits, ' '))
622 sprintf(units, "1/(%s)", dataUnits);
623 else
624 sprintf(units, "1/%s", dataUnits);
625 } else
626 units = NULL;
627 break;
628 case NORMALIZE_SUM:
629 symbol = "FractionalFrequency";
630 units = NULL;
631 break;
632 default:
633 if (weightColumn) {
634 char *weightUnits = NULL;
635 if (weightColumn && !SDDS_GetColumnInformation(inTable, "units", &weightUnits, SDDS_GET_BY_NAME, weightColumn))
636 return 0;
637 symbol = "WeightedNumberOfOccurrences";
638 units = weightUnits;
639 } else {
640 symbol = "NumberOfOccurrences";
641 units = NULL;
642 }
643 break;
644 }
645
646 if ((iFreq = SDDS_DefineColumn(outTable, "frequency", symbol, units, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
647 return 0;
648 free(units);
649 units = NULL;
650 }
651 if (!freOnly) {
652 sprintf(s, "%sCdf", dataColumn);
653 if ((iCdf = SDDS_DefineColumn(outTable, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
654 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
655 }
656
657 /* Define output parameters */
658 if (SDDS_DefineParameter(outTable, "sddshistInput", NULL, NULL, NULL, NULL, SDDS_STRING,
659 inputfile) < 0 ||
660 (weightColumn && SDDS_DefineParameter(outTable, "sddshistWeight", NULL, NULL, NULL, NULL,
661 SDDS_STRING, weightColumn) < 0) ||
662 (iBins = SDDS_DefineParameter(outTable, "sddshistBins", NULL, NULL, NULL, NULL, SDDS_LONG,
663 NULL)) < 0 ||
664 (iBinSize = SDDS_DefineParameter(outTable, "sddshistBinSize", NULL, NULL, NULL, NULL, SDDS_DOUBLE, NULL)) < 0 ||
665 (iPoints = SDDS_DefineParameter(outTable, "sddshistBinned", NULL, NULL, NULL, NULL, SDDS_LONG, NULL)) < 0)
666 return 0;
667 if (filterColumn) {
668 char *filterUnits;
669 if (!SDDS_GetColumnInformation(inTable, "units", &filterUnits, SDDS_GET_BY_NAME, filterColumn) ||
670 SDDS_DefineParameter(outTable, "sddshistFilter", NULL, NULL, NULL, NULL, SDDS_STRING,
671 filterColumn) < 0 ||
672 (iLoFilter = SDDS_DefineParameter(outTable, "sddshistLowerFilter", NULL, filterUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0 ||
673 (iUpFilter = SDDS_DefineParameter(outTable, "sddshistUpperFilter", NULL, filterUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
674 return 0;
675 if (filterUnits)
676 free(filterUnits);
677 }
678 if (doStats) {
679 char *buffer;
680 buffer = tmalloc(sizeof(*buffer) * (strlen(dataColumn) + 20));
681 sprintf(buffer, "%sMean", dataColumn);
682 if ((iMean = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
683 return 0;
684 sprintf(buffer, "%sRms", dataColumn);
685 if ((iRMS = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
686 return 0;
687 sprintf(buffer, "%sStDev", dataColumn);
688 if ((iStDev = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
689 return 0;
690 free(buffer);
691 }
692 if (SDDS_DefineParameter(outTable, "sddshistNormMode", NULL, NULL, NULL, NULL, SDDS_STRING, normalize_option[normalizeMode]) < 0 ||
693 !SDDS_TransferAllParameterDefinitions(outTable, inTable, SDDS_TRANSFER_KEEPOLD) ||
694 !SDDS_WriteLayout(outTable))
695 return 0;
696 return 1;
697}
698
699int64_t readRegionFile(SDDS_DATASET *SDDSin, char *filename, char *positionColumn, char *nameColumn, double **regionPosition, char ***regionName) {
700 int64_t i, rows = 0;
701 if (!SDDS_InitializeInput(SDDSin, filename) ||
702 SDDS_ReadPage(SDDSin) != 1 || (rows = SDDS_RowCount(SDDSin)) < 1 ||
703 !(*regionPosition = SDDS_GetColumnInDoubles(SDDSin, positionColumn)) ||
704 !(*regionName = (char **)SDDS_GetColumn(SDDSin, nameColumn)))
705 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
706 for (i = 1; i < rows; i++)
707 if ((*regionPosition)[i] <= (*regionPosition)[i - 1]) {
708 fprintf(stderr, "sddshist: Error in region position data: row %" PRId64 " is %21.15e while row %" PRId64 " is %21.15e\n",
709 i - 1, (*regionPosition)[i - 1], i, (*regionPosition)[i]);
710 exit(EXIT_FAILURE);
711 }
712 *regionPosition = SDDS_Realloc(*regionPosition, sizeof(**regionPosition) * (rows + 1));
713 (*regionPosition)[rows] = DBL_MAX;
714 *regionName = SDDS_Realloc(*regionName, sizeof(**regionName) * (rows + 1));
715 cp_str(&(*regionName)[rows], "Beyond");
716 return rows;
717}
718
719void classifyByRegion(double *data, double *weight, int64_t points, double *histogram, double *regionPosition, int64_t bins) {
720 int64_t iData, iBin;
721
722 for (iBin = 0; iBin < bins; iBin++)
723 histogram[iBin] = 0;
724
725 for (iData = 0; iData < points; iData++) {
726 /* Note that bins is 1 greater than the number of positions */
727 for (iBin = 0; iBin < bins - 1; iBin++) {
728 if (data[iData] < regionPosition[iBin])
729 break;
730 }
731 if (weight) {
732 histogram[iBin] += weight[iData];
733 } else {
734 histogram[iBin] += 1;
735 }
736 }
737}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
Definition cp_str.c:28
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
Definition findMinMax.c:318
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeWeightedMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, double *w, long n, long numThreads)
Computes weighted statistical moments of an array using multiple threads.
Definition moments.c:239
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.