SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddshist.c File Reference

Detailed Description

Command-line tool for generating histograms from SDDS-formatted data.

This program is a command-line tool designed to generate histograms from data contained in SDDS (Self Describing Data Set) files. It supports various configurations like normalization, filtering, multi-threading for performance, and the ability to define histograms based on specific regions in the data.

Usage

sddshist [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
-dataColumn=<column-name>
[{
-bins=<number> |
-sizeOfBins=<value> |
-regions=filename=<filename>,position=<columnName>,name=<columnName>
}]
[-lowerLimit=<value>]
[-upperLimit=<value>]
[-expand=<factor>]
[-filter=<column-name>,<lower-limit>,<upper-limit>]
[-weightColumn=<column-name>]
[-sides[=<points>]]
[-normalize[={sum|area|peak}]]
[-cdf[=only]]
[-threads=<number>]
[-statistics]
[-verbose]
[-majorOrder=row|column]

Options

Required Description
-dataColumn=<column-name> Specifies the column to be histogrammed.
Optional Description
-pipe Use pipe for input and/or output.
-bins Set the number of bins for the histogram.
-sizeOfBins Set the size of each bin.
-regions Define region-based histogramming with file and column information.
-lowerLimit Set the lower limit for the histogram.
-upperLimit Set the upper limit for the histogram.
-expand Expand the histogram range by a given factor.
-filter Filter data points based on values in a specified column.
-weightColumn Specify a column to weight the histogram.
-sides Extend the histogram to zero level.
-normalize Normalize the histogram (sum, area, or peak).
-cdf Include the Cumulative Distribution Function (CDF) in the output.
-threads Set the number of threads for processing.
-statistics Include statistical details in the output.
-verbose Print additional processing details.
-majorOrder Set data major order (row or column).

Incompatibilities

  • -bins, -sizeOfBins, and -regions are mutually exclusive.
  • -normalize and -cdf=only cannot be used together.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
M. Borland, C. Saunders, R. Soliday, H. Shang

Definition in file sddshist.c.

#include "mdb.h"
#include "scan.h"
#include "SDDS.h"

Go to the source code of this file.

Functions

static int64_t filter (double *x, double *y, double *filterData, int64_t npts, double lower_filter, double upper_filter)
 
static long setupOutputFile (SDDS_DATASET *outTable, char *outputfile, SDDS_DATASET *inTable, char *inputfile, char *dataColumn, char *weightColumn, char *filterColumn, double lowerFilter, double upperFilter, SDDS_DATASET *regionTable, char *regionNameColumn, long doStats, int64_t bins, double binSize, long normalizeMode, short columnMajorOrder)
 
int64_t readRegionFile (SDDS_DATASET *SDDSin, char *filename, char *positionColumn, char *nameColumn, double **regionPosition, char ***regionName)
 
void classifyByRegion (double *data, double *weight, int64_t points, double *histogram, double *regionPosition, int64_t bins)
 
int main (int argc, char **argv)
 

Function Documentation

◆ classifyByRegion()

void classifyByRegion ( double * data,
double * weight,
int64_t points,
double * histogram,
double * regionPosition,
int64_t bins )

Definition at line 752 of file sddshist.c.

752 {
753 int64_t iData, iBin;
754
755 for (iBin = 0; iBin < bins; iBin++)
756 histogram[iBin] = 0;
757
758 for (iData = 0; iData < points; iData++) {
759 /* Note that bins is 1 greater than the number of positions */
760 for (iBin = 0; iBin < bins - 1; iBin++) {
761 if (data[iData] < regionPosition[iBin])
762 break;
763 }
764 if (weight) {
765 histogram[iBin] += weight[iData];
766 } else {
767 histogram[iBin] += 1;
768 }
769 }
770}

◆ filter()

static int64_t filter ( double * x,
double * y,
double * filterData,
int64_t npts,
double lower_filter,
double upper_filter )
static

Definition at line 586 of file sddshist.c.

586 {
587 int64_t i, j;
588 static char *keep = NULL;
589 static int64_t maxPoints = 0;
590
591 if (maxPoints < npts)
592 keep = trealloc(keep, sizeof(*keep) * (maxPoints = npts));
593
594 for (i = 0; i < npts; i++) {
595 if (filterData[i] < lower_filter || filterData[i] > upper_filter)
596 keep[i] = 0;
597 else
598 keep[i] = 1;
599 }
600
601 for (i = j = 0; i < npts; i++)
602 if (keep[i]) {
603 if (i != j) {
604 if (x)
605 x[j] = x[i];
606 if (y)
607 y[j] = y[i];
608 filterData[j] = filterData[i];
609 }
610 j++;
611 }
612
613 return j;
614}
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181

◆ main()

int main ( int argc,
char ** argv )

Definition at line 171 of file sddshist.c.

171 {
172 /* Flags to keep track of what is set in command line */
173 long binsGiven, lowerLimitGiven, upperLimitGiven;
174 SDDS_DATASET inTable, outTable;
175 double *data; /* Pointer to the array to histogram */
176 double *filterData; /* Pointer to the filter data */
177 double *weightData; /* Pointer to the weight data */
178 double *hist, *hist1; /* To store the histogram */
179 double *CDF, *CDF1; /* To store the CDF */
180 double sum; /* Total of histogram */
181 double *indep; /* Values of bin centers */
182 double lowerLimit, upperLimit; /* Lower and upper limits in histogram */
183 double givenLowerLimit, givenUpperLimit; /* Given lower and upper limits */
184 double range, binSize;
185 int64_t bins; /* Number of bins in the histogram */
186 long doStats; /* Include statistics in output file */
187 double mean, rms, standDev, mad;
188 char *filterColumn, *dataColumn, *weightColumn;
189 double lowerFilter = 0, upperFilter = 0; /* Filter range */
190 int64_t points; /* Number of data points after filtering */
191 SCANNED_ARG *scanned; /* Scanned argument structure */
192 char *inputfile, *outputfile; /* Input and output filenames */
193 double dx; /* Spacing of bins in histogram */
194 int64_t i; /* Loop variable */
195 long pointsBinned; /* Number of points in histogram */
196 long normalizeMode, doSides, verbose, readCode;
197 int64_t rows;
198 unsigned long pipeFlags, majorOrderFlag, regionFlags = 0;
199 char *cdf;
200 double expansionFactor = 0;
201 short columnMajorOrder = -1;
202 char *regionFilename = NULL, *regionPositionColumn = NULL, *regionNameColumn = NULL;
203 double *regionPosition = NULL;
204 int64_t nRegions = 0;
205 char **regionName = NULL;
206 SDDS_DATASET SDDSregion;
207 int threads = 1;
208
210 argc = scanargs(&scanned, argc, argv);
211 if (argc < 3) {
212 fprintf(stderr, "%s\n", USAGE);
213 exit(EXIT_FAILURE);
214 }
215
216 binsGiven = lowerLimitGiven = upperLimitGiven = 0;
217 binSize = doSides = 0;
218 inputfile = outputfile = NULL;
219 dataColumn = filterColumn = weightColumn = NULL;
220 doStats = verbose = 0;
221 normalizeMode = NORMALIZE_NO;
222 pipeFlags = 0;
223 dx = 0;
224 cdfOnly = 0;
225 freOnly = 1;
226
227 for (i = 1; i < argc; i++) {
228 if (scanned[i].arg_type == OPTION) {
229 switch (match_string(scanned[i].list[0], option, N_OPTIONS, 0)) {
230 case SET_MAJOR_ORDER:
231 majorOrderFlag = 0;
232 scanned[i].n_items--;
233 if (scanned[i].n_items > 0 && (!scanItemList(&majorOrderFlag, scanned[i].list + 1, &scanned[i].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
234 SDDS_Bomb("invalid -majorOrder syntax/values");
235 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
236 columnMajorOrder = 1;
237 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
238 columnMajorOrder = 0;
239 break;
240 case SET_BINS: /* Set number of bins */
241 if (binsGiven)
242 SDDS_Bomb("-bins specified more than once");
243 binsGiven = 1;
244 if (sscanf(scanned[i].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
245 SDDS_Bomb("invalid value for bins");
246 break;
247 case SET_LOWERLIMIT:
248 if (lowerLimitGiven)
249 SDDS_Bomb("-lowerLimit specified more than once");
250 lowerLimitGiven = 1;
251 if (sscanf(scanned[i].list[1], "%lf", &givenLowerLimit) != 1)
252 SDDS_Bomb("invalid value for lowerLimit");
253 break;
254 case SET_UPPERLIMIT:
255 if (upperLimitGiven)
256 SDDS_Bomb("-upperLimit specified more than once");
257 upperLimitGiven = 1;
258 if (sscanf(scanned[i].list[1], "%lf", &givenUpperLimit) != 1)
259 SDDS_Bomb("invalid value for upperLimit");
260 break;
261 case SET_EXPAND:
262 expansionFactor = 0;
263 if (sscanf(scanned[i].list[1], "%lf", &expansionFactor) != 1 || expansionFactor <= 0)
264 SDDS_Bomb("invalid value for expand");
265 break;
266 case SET_DATACOLUMN:
267 if (dataColumn)
268 SDDS_Bomb("-dataColumn specified more than once");
269 if (scanned[i].n_items != 2)
270 SDDS_Bomb("invalid -dataColumn syntax---supply name");
271 dataColumn = scanned[i].list[1];
272 break;
273 case SET_FILTER:
274 if (filterColumn)
275 SDDS_Bomb("multiple filter specifications not allowed");
276 if (scanned[i].n_items != 4 || sscanf(scanned[i].list[2], "%lf", &lowerFilter) != 1 ||
277 sscanf(scanned[i].list[3], "%lf", &upperFilter) != 1 || lowerFilter > upperFilter)
278 SDDS_Bomb("invalid -filter syntax/values");
279 filterColumn = scanned[i].list[1];
280 break;
281 case SET_WEIGHTCOLUMN:
282 if (weightColumn)
283 SDDS_Bomb("multiple weighting columns not allowed");
284 if (scanned[i].n_items != 2)
285 SDDS_Bomb("-weightColumn requires a column name");
286 weightColumn = scanned[i].list[1];
287 break;
288 case SET_NORMALIZE:
289 if (scanned[i].n_items == 1)
290 normalizeMode = NORMALIZE_SUM;
291 else if (scanned[i].n_items != 2 || (normalizeMode = match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
292 SDDS_Bomb("invalid -normalize syntax");
293 break;
294 case SET_STATISTICS:
295 doStats = 1;
296 break;
297 case SET_SIDES:
298 if (scanned[i].n_items == 1)
299 doSides = 1;
300 else if (scanned[i].n_items > 2 || (sscanf(scanned[i].list[1], "%ld", &doSides) != 1 || doSides <= 0))
301 SDDS_Bomb("invalid -sides syntax");
302 break;
303 case SET_VERBOSE:
304 verbose = 1;
305 break;
306 case SET_BINSIZE:
307 if (sscanf(scanned[i].list[1], "%le", &binSize) != 1 || binSize <= 0)
308 SDDS_Bomb("invalid value for bin size");
309 break;
310 case SET_PIPE:
311 if (!processPipeOption(scanned[i].list + 1, scanned[i].n_items - 1, &pipeFlags))
312 SDDS_Bomb("invalid -pipe syntax");
313 break;
314 case SET_CDF:
315 if (scanned[i].n_items == 1)
316 cdfOnly = 0;
317 else {
318 if (scanned[i].n_items != 2)
319 SDDS_Bomb("invalid -cdf syntax");
320 cdf = scanned[i].list[1];
321 if (strcmp(cdf, "only") != 0)
322 SDDS_Bomb("invalid -cdf value, it should be -cdf or -cdf=only");
323 cdfOnly = 1;
324 }
325 freOnly = 0;
326 break;
327 case SET_REGION_FILE:
328 if (scanned[i].n_items != 4)
329 SDDS_Bomb("invalid -regionFile syntax");
330 regionFlags = 0;
331 scanned[i].n_items -= 1;
332 if (!scanItemList(&regionFlags, scanned[i].list + 1, &scanned[i].n_items, 0,
333 "filename", SDDS_STRING, &regionFilename, 1, 1,
334 "position", SDDS_STRING, &regionPositionColumn, 1, 2,
335 "name", SDDS_STRING, &regionNameColumn, 1, 4, NULL) ||
336 regionFlags != (1 + 2 + 4) || !regionFilename || !regionPositionColumn || !regionNameColumn)
337 SDDS_Bomb("invalid -regionFile syntax");
338 break;
339 case SET_THREADS:
340 if (scanned[i].n_items != 2 ||
341 !sscanf(scanned[i].list[1], "%d", &threads) || threads < 1)
342 SDDS_Bomb("invalid -threads syntax");
343 break;
344 default:
345 fprintf(stderr, "Error: option %s not recognized\n", scanned[i].list[0]);
346 exit(EXIT_FAILURE);
347 break;
348 }
349 } else {
350 /* Argument is filename */
351 if (!inputfile)
352 inputfile = scanned[i].list[0];
353 else if (!outputfile)
354 outputfile = scanned[i].list[0];
355 else
356 SDDS_Bomb("too many filenames seen");
357 }
358 }
359
360 processFilenames("sddshist", &inputfile, &outputfile, pipeFlags, 0, NULL);
361
362 if (binSize && binsGiven && regionFlags)
363 SDDS_Bomb("Provide only one of -bins, -sizeOfBins, or -regions");
364 if (!binsGiven)
365 bins = 20;
366 if (!dataColumn)
367 SDDS_Bomb("-dataColumn must be specified");
368
369 if (regionFlags) {
370 if (!(nRegions = readRegionFile(&SDDSregion, regionFilename, regionPositionColumn, regionNameColumn, &regionPosition, &regionName)))
371 SDDS_Bomb("Problem with region file. Check existence and type of columns");
372 doSides = 0;
373 bins = nRegions + 1;
374 }
375
376 hist = tmalloc(sizeof(*hist) * (bins + 2 * doSides));
377 CDF = CDF1 = tmalloc(sizeof(*hist) * (bins + 2 * doSides));
378 indep = tmalloc(sizeof(*indep) * (bins + 2 * doSides));
379 pointsBinned = 0;
380
381 if (!SDDS_InitializeInput(&inTable, inputfile) ||
382 SDDS_GetColumnIndex(&inTable, dataColumn) < 0 ||
383 (weightColumn && SDDS_GetColumnIndex(&inTable, weightColumn) < 0) ||
384 (filterColumn && SDDS_GetColumnIndex(&inTable, filterColumn) < 0))
385 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
386 if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, &SDDSregion, regionNameColumn, doStats, bins, binSize, normalizeMode, columnMajorOrder))
387 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
388
389 data = weightData = filterData = NULL;
390 while ((readCode = SDDS_ReadPage(&inTable)) > 0) {
391 if ((rows = SDDS_CountRowsOfInterest(&inTable)) < 0)
392 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
393 if (rows && (!(data = SDDS_GetColumnInDoubles(&inTable, dataColumn)) ||
394 (weightColumn && !(weightData = SDDS_GetColumnInDoubles(&inTable, weightColumn))) ||
395 (filterColumn && !(filterData = SDDS_GetColumnInDoubles(&inTable, filterColumn)))))
396 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
397
398 if (rows && filterColumn)
399 points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter);
400 else
401 points = rows;
402
403 pointsBinned = 0;
404 if (points) {
405 if (doStats) {
406 if (!weightColumn)
407 computeMomentsThreaded(&mean, &rms, &standDev, &mad, data, points, threads);
408 else
409 computeWeightedMomentsThreaded(&mean, &rms, &standDev, &mad, data, weightData, points, threads);
410 }
411
412 if (regionFlags) {
413 classifyByRegion(data, weightData, points, hist, regionPosition, bins);
414 hist1 = hist;
415 } else {
416 if (!lowerLimitGiven) {
417 lowerLimit = (points > 0) ? data[0] : 0;
418 for (i = 0; i < points; i++)
419 if (lowerLimit > data[i])
420 lowerLimit = data[i];
421 } else {
422 lowerLimit = givenLowerLimit;
423 }
424 if (!upperLimitGiven) {
425 upperLimit = (points > 0) ? data[0] : 0;
426 for (i = 0; i < points; i++)
427 if (upperLimit < data[i])
428 upperLimit = data[i];
429 } else {
430 upperLimit = givenUpperLimit;
431 }
432
433 range = upperLimit - lowerLimit;
434 if (!lowerLimitGiven)
435 lowerLimit -= range * 1e-7;
436 if (!upperLimitGiven)
437 upperLimit += range * 1e-7;
438 if (upperLimit == lowerLimit) {
439 if (binSize) {
440 upperLimit += binSize / 2;
441 lowerLimit -= binSize / 2;
442 } else if (fabs(upperLimit) < sqrt(DBL_MIN)) {
443 upperLimit = sqrt(DBL_MIN);
444 lowerLimit = -sqrt(DBL_MIN);
445 } else {
446 upperLimit += upperLimit * (1 + 2 * DBL_EPSILON);
447 lowerLimit -= upperLimit * (1 - 2 * DBL_EPSILON);
448 }
449 }
450 if (expansionFactor > 0) {
451 double center = (upperLimit + lowerLimit) / 2;
452 range = expansionFactor * (upperLimit - lowerLimit);
453 lowerLimit = center - range / 2;
454 upperLimit = center + range / 2;
455 }
456 dx = (upperLimit - lowerLimit) / bins;
457
458 if (binSize) {
459 double middle;
460 range = ((range / binSize) + 1) * binSize;
461 middle = (lowerLimit + upperLimit) / 2;
462 lowerLimit = middle - range / 2;
463 upperLimit = middle + range / 2;
464 dx = binSize;
465 bins = range / binSize + 0.5;
466 if (bins < 1 && !doSides)
467 bins = 2 * doSides;
468 indep = trealloc(indep, sizeof(*indep) * (bins + 2 * doSides));
469 hist = trealloc(hist, sizeof(*hist) * (bins + 2 * doSides));
470 CDF = trealloc(CDF, sizeof(*hist) * (bins + 2 * doSides));
471 }
472
473 for (i = -doSides; i < bins + doSides; i++)
474 indep[i + doSides] = (i + 0.5) * dx + lowerLimit;
475 hist1 = hist + doSides;
476 CDF1 = CDF + doSides;
477 if (doSides) {
478 hist[0] = hist[bins + doSides] = 0;
479 }
480
481 if (!weightColumn)
482 pointsBinned = make_histogram(hist1, bins, lowerLimit, upperLimit, data, points, 1);
483 else
484 pointsBinned = make_histogram_weighted(hist1, bins, lowerLimit, upperLimit, data, points, 1, weightData);
485 }
486
487 sum = 0;
488 for (i = 0; i < bins + doSides; i++) {
489 sum += hist1[i];
490 }
491 CDF1[0] = hist1[0] / sum;
492 for (i = 1; i < bins + doSides; i++) {
493 CDF1[i] = CDF1[i - 1] + hist1[i] / sum;
494 }
495
496 if (verbose)
497 fprintf(stderr, "%ld points of %" PRId64 " from page %ld histogrammed in %" PRId64 " bins\n", pointsBinned, rows, readCode, bins);
498 if (!cdfOnly) {
499 if (normalizeMode != NORMALIZE_NO) {
500 double norm = 0;
501 switch (normalizeMode) {
502 case NORMALIZE_PEAK:
503 norm = max_in_array(hist1, bins);
504 break;
505 case NORMALIZE_AREA:
506 case NORMALIZE_SUM:
507 for (i = 0; i < bins; i++)
508 norm += hist1[i];
509 if (normalizeMode == NORMALIZE_AREA)
510 norm *= dx;
511 break;
512 default:
513 SDDS_Bomb("invalid normalize mode--consult programmer.");
514 break;
515 }
516 if (norm)
517 for (i = 0; i < bins; i++)
518 hist1[i] /= norm;
519 }
520 }
521 }
522
523 if (regionFlags) {
524 if (!SDDS_StartPage(&outTable, bins) ||
525 !SDDS_CopyParameters(&outTable, &inTable) ||
526 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
527 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
528 if (points) {
529 if (!SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, regionPosition, bins, iIndep) ||
530 !SDDS_SetColumn(&outTable, SDDS_SET_BY_NAME, regionName, bins, regionNameColumn))
531 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
532 if (!freOnly && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins, iCdf))
533 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
534 if (!cdfOnly && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins, iFreq))
535 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
536 }
537 } else {
538 if (!SDDS_StartPage(&outTable, bins + 2 * doSides) ||
539 !SDDS_CopyParameters(&outTable, &inTable) ||
540 (points && (!SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, indep, bins + 2 * doSides, iIndep))) ||
541 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
542 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
543 if (!freOnly) {
544 if (points && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins + 2 * doSides, iCdf))
545 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
546 }
547 if (!cdfOnly) {
548 if (points && !SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins + 2 * doSides, iFreq))
549 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
550 }
551 }
552
553 if (filterColumn && points &&
554 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iLoFilter, lowerFilter, iUpFilter, upperFilter, -1))
555 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
556 if (doStats && points &&
557 !SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iMean, mean, iRMS, rms, iStDev, standDev, -1))
558 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
559
560 if (!SDDS_WritePage(&outTable))
561 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
562 if (data)
563 free(data);
564 if (weightData)
565 free(weightData);
566 if (filterData)
567 free(filterData);
568 data = weightData = filterData = NULL;
569 }
570
571 if (!SDDS_Terminate(&inTable)) {
572 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
573 exit(EXIT_FAILURE);
574 }
575 if (!SDDS_Terminate(&outTable)) {
576 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
577 exit(EXIT_FAILURE);
578 }
579 return EXIT_SUCCESS;
580}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
Definition findMinMax.c:318
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeWeightedMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, double *w, long n, long numThreads)
Computes weighted statistical moments of an array using multiple threads.
Definition moments.c:239
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ readRegionFile()

int64_t readRegionFile ( SDDS_DATASET * SDDSin,
char * filename,
char * positionColumn,
char * nameColumn,
double ** regionPosition,
char *** regionName )

Definition at line 732 of file sddshist.c.

732 {
733 int64_t i, rows = 0;
734 if (!SDDS_InitializeInput(SDDSin, filename) ||
735 SDDS_ReadPage(SDDSin) != 1 || (rows = SDDS_RowCount(SDDSin)) < 1 ||
736 !(*regionPosition = SDDS_GetColumnInDoubles(SDDSin, positionColumn)) ||
737 !(*regionName = (char **)SDDS_GetColumn(SDDSin, nameColumn)))
738 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
739 for (i = 1; i < rows; i++)
740 if ((*regionPosition)[i] <= (*regionPosition)[i - 1]) {
741 fprintf(stderr, "sddshist: Error in region position data: row %" PRId64 " is %21.15e while row %" PRId64 " is %21.15e\n",
742 i - 1, (*regionPosition)[i - 1], i, (*regionPosition)[i]);
743 exit(EXIT_FAILURE);
744 }
745 *regionPosition = SDDS_Realloc(*regionPosition, sizeof(**regionPosition) * (rows + 1));
746 (*regionPosition)[rows] = DBL_MAX;
747 *regionName = SDDS_Realloc(*regionName, sizeof(**regionName) * (rows + 1));
748 cp_str(&(*regionName)[rows], "Beyond");
749 return rows;
750}
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
Definition cp_str.c:28

◆ setupOutputFile()

static long setupOutputFile ( SDDS_DATASET * outTable,
char * outputfile,
SDDS_DATASET * inTable,
char * inputfile,
char * dataColumn,
char * weightColumn,
char * filterColumn,
double lowerFilter,
double upperFilter,
SDDS_DATASET * regionTable,
char * regionNameColumn,
long doStats,
int64_t bins,
double binSize,
long normalizeMode,
short columnMajorOrder )
static

Definition at line 616 of file sddshist.c.

619 {
620 char *symbol, *units, *dataUnits, *outputFormat;
621 int32_t outputType;
622 char s[1024];
623
624 if (!SDDS_InitializeOutput(outTable, SDDS_BINARY, 0, NULL, "sddshist output", outputfile))
625 return 0;
626 if (columnMajorOrder != -1)
627 outTable->layout.data_mode.column_major = columnMajorOrder;
628 else
629 outTable->layout.data_mode.column_major = inTable->layout.data_mode.column_major;
630 if (!SDDS_GetColumnInformation(inTable, "units", &dataUnits, SDDS_GET_BY_NAME, dataColumn))
631 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
632
633 /* Define output columns */
634 outputType = SDDS_DOUBLE;
635 outputFormat = NULL;
636
637 if (!SDDS_TransferColumnDefinition(outTable, inTable, dataColumn, NULL) ||
638 !SDDS_ChangeColumnInformation(outTable, "type", &outputType, SDDS_BY_NAME, dataColumn) ||
639 !SDDS_ChangeColumnInformation(outTable, "format_string", &outputFormat, SDDS_BY_NAME, dataColumn) ||
640 (iIndep = SDDS_GetColumnIndex(outTable, dataColumn)) < 0)
641 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
642 if (regionNameColumn && !SDDS_TransferColumnDefinition(outTable, regionTable, regionNameColumn, NULL))
643 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
644 if (!cdfOnly) {
645 switch (normalizeMode) {
646 case NORMALIZE_PEAK:
647 symbol = "RelativeFrequency";
648 units = NULL;
649 break;
650 case NORMALIZE_AREA:
651 symbol = "NormalizedFrequency";
652 if (dataUnits && !SDDS_StringIsBlank(dataUnits)) {
653 units = tmalloc(sizeof(*units) * (strlen(dataUnits) + 5));
654 if (strchr(dataUnits, ' '))
655 sprintf(units, "1/(%s)", dataUnits);
656 else
657 sprintf(units, "1/%s", dataUnits);
658 } else
659 units = NULL;
660 break;
661 case NORMALIZE_SUM:
662 symbol = "FractionalFrequency";
663 units = NULL;
664 break;
665 default:
666 if (weightColumn) {
667 char *weightUnits = NULL;
668 if (weightColumn && !SDDS_GetColumnInformation(inTable, "units", &weightUnits, SDDS_GET_BY_NAME, weightColumn))
669 return 0;
670 symbol = "WeightedNumberOfOccurrences";
671 units = weightUnits;
672 } else {
673 symbol = "NumberOfOccurrences";
674 units = NULL;
675 }
676 break;
677 }
678
679 if ((iFreq = SDDS_DefineColumn(outTable, "frequency", symbol, units, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
680 return 0;
681 free(units);
682 units = NULL;
683 }
684 if (!freOnly) {
685 sprintf(s, "%sCdf", dataColumn);
686 if ((iCdf = SDDS_DefineColumn(outTable, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
687 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
688 }
689
690 /* Define output parameters */
691 if (SDDS_DefineParameter(outTable, "sddshistInput", NULL, NULL, NULL, NULL, SDDS_STRING,
692 inputfile) < 0 ||
693 (weightColumn && SDDS_DefineParameter(outTable, "sddshistWeight", NULL, NULL, NULL, NULL,
694 SDDS_STRING, weightColumn) < 0) ||
695 (iBins = SDDS_DefineParameter(outTable, "sddshistBins", NULL, NULL, NULL, NULL, SDDS_LONG,
696 NULL)) < 0 ||
697 (iBinSize = SDDS_DefineParameter(outTable, "sddshistBinSize", NULL, NULL, NULL, NULL, SDDS_DOUBLE, NULL)) < 0 ||
698 (iPoints = SDDS_DefineParameter(outTable, "sddshistBinned", NULL, NULL, NULL, NULL, SDDS_LONG, NULL)) < 0)
699 return 0;
700 if (filterColumn) {
701 char *filterUnits;
702 if (!SDDS_GetColumnInformation(inTable, "units", &filterUnits, SDDS_GET_BY_NAME, filterColumn) ||
703 SDDS_DefineParameter(outTable, "sddshistFilter", NULL, NULL, NULL, NULL, SDDS_STRING,
704 filterColumn) < 0 ||
705 (iLoFilter = SDDS_DefineParameter(outTable, "sddshistLowerFilter", NULL, filterUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0 ||
706 (iUpFilter = SDDS_DefineParameter(outTable, "sddshistUpperFilter", NULL, filterUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
707 return 0;
708 if (filterUnits)
709 free(filterUnits);
710 }
711 if (doStats) {
712 char *buffer;
713 buffer = tmalloc(sizeof(*buffer) * (strlen(dataColumn) + 20));
714 sprintf(buffer, "%sMean", dataColumn);
715 if ((iMean = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
716 return 0;
717 sprintf(buffer, "%sRms", dataColumn);
718 if ((iRMS = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
719 return 0;
720 sprintf(buffer, "%sStDev", dataColumn);
721 if ((iStDev = SDDS_DefineParameter(outTable, buffer, NULL, dataUnits, NULL, NULL, SDDS_DOUBLE, NULL)) < 0)
722 return 0;
723 free(buffer);
724 }
725 if (SDDS_DefineParameter(outTable, "sddshistNormMode", NULL, NULL, NULL, NULL, SDDS_STRING, normalize_option[normalizeMode]) < 0 ||
726 !SDDS_TransferAllParameterDefinitions(outTable, inTable, SDDS_TRANSFER_KEEPOLD) ||
727 !SDDS_WriteLayout(outTable))
728 return 0;
729 return 1;
730}
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37