SDDSlib
Loading...
Searching...
No Matches
sddsmultihist.c File Reference

SDDS-format multi-column histogramming program. More...

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include "SDDSutils.h"
#include <ctype.h>

Go to the source code of this file.

Macros

#define DO_SIDES   1
 
#define DO_CLOSE_SIDES   2
 
#define DO_AGAINST_SIDES   3
 
#define NORMALIZE_PEAK   0
 
#define NORMALIZE_SUM   1
 
#define NORMALIZE_NO   2
 
#define N_NORMALIZE_OPTIONS   3
 

Enumerations

enum  option_type {
  SET_COLUMNS , SET_PIPE , SET_EXCLUDE , SET_ABSCISSA ,
  SET_BINS , SET_SIZEOFBINS , SET_LOWERLIMIT , SET_UPPERLIMIT ,
  SET_SIDES , SET_SEPARATE , SET_EXPAND , SET_CDF ,
  SET_AUTOBINS , SET_MAJOR_ORDER , SET_BOUNDARYDATA , SET_WEIGHT ,
  SET_NORMALIZE , N_OPTIONS
}
 

Functions

void SetUpOutput (SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, long **abscissaIndex, long **cdfIndex, long **histogramIndex, char *output, char **columnName, long columnNames, char **abscissaName, long abscissaNames, char *boundaryColumn, char *boundaryColumnUnits, short columnMajorOrder, short normMode)
 
double * ReadBoundaryData (char *file, char *column, int64_t *n, char **units)
 
void MakeBoundaryHistogram (double *histogram, double *cdf, double *boundaryValue, int64_t nBoundaryValues, double *data, double *weight, int64_t nData)
 
void NormalizeHistogram (double *hist, int64_t bins, short mode)
 
int main (int argc, char **argv)
 

Variables

char * option [N_OPTIONS]
 
static char * USAGE
 
char * normalize_option [N_NORMALIZE_OPTIONS]
 
static short cdfOnly
 
static short freOnly
 

Detailed Description

SDDS-format multi-column histogramming program.

This program reads SDDS-formatted input data and generates histograms for specified columns. It supports various options for customizing the histogram generation, including automatic bin sizing, boundary data, normalization, and more.

Usage

sddsmultihist [<inputfile>] [<outputfile>] [options]

Options

-pipe=[input][,output] -columns=<name>[,...] -abscissa=<name>[,...] -exclude=<name>[,...] -bins=<integer> -sizeOfBins=

-autobins=target=<number>[,minimum=<integer>][,maximum=<integer>] -boundaryData=<filename>,<column> -sides[=close|against] -expand=<fraction> -lowerLimit=

[,...] -upperLimit=

[,...] -separate -cdf=[only] -weightColumn=<name> -majorOrder=row|column -normalize={sum|peak|no}

License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, L. Emery, R. Soliday, H. Shang

Definition in file sddsmultihist.c.

Macro Definition Documentation

◆ DO_AGAINST_SIDES

#define DO_AGAINST_SIDES   3

Definition at line 132 of file sddsmultihist.c.

◆ DO_CLOSE_SIDES

#define DO_CLOSE_SIDES   2

Definition at line 131 of file sddsmultihist.c.

◆ DO_SIDES

#define DO_SIDES   1

Definition at line 130 of file sddsmultihist.c.

◆ N_NORMALIZE_OPTIONS

#define N_NORMALIZE_OPTIONS   3

Definition at line 137 of file sddsmultihist.c.

◆ NORMALIZE_NO

#define NORMALIZE_NO   2

Definition at line 136 of file sddsmultihist.c.

◆ NORMALIZE_PEAK

#define NORMALIZE_PEAK   0

Definition at line 134 of file sddsmultihist.c.

◆ NORMALIZE_SUM

#define NORMALIZE_SUM   1

Definition at line 135 of file sddsmultihist.c.

Enumeration Type Documentation

◆ option_type

enum option_type

Definition at line 52 of file sddsmultihist.c.

52 {
53 SET_COLUMNS,
54 SET_PIPE,
55 SET_EXCLUDE,
56 SET_ABSCISSA,
57 SET_BINS,
58 SET_SIZEOFBINS,
59 SET_LOWERLIMIT,
60 SET_UPPERLIMIT,
61 SET_SIDES,
62 SET_SEPARATE,
63 SET_EXPAND,
64 SET_CDF,
65 SET_AUTOBINS,
66 SET_MAJOR_ORDER,
67 SET_BOUNDARYDATA,
68 SET_WEIGHT,
69 SET_NORMALIZE,
70 N_OPTIONS
71};

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 152 of file sddsmultihist.c.

152 {
153 int iArg;
154 char *boundaryFile, *boundaryColumn, *boundaryColumnUnits;
155 double *boundaryValue;
156 int64_t nBoundaryValues;
157 char **abscissaName, **columnName, **excludeName;
158 long columnNames, excludeNames, abscissaNames, column, offset;
159 long givenLowerLimits, givenUpperLimits;
160 char *input, *output;
161 long readCode, binsGiven;
162 int64_t i, rows, bins, writeBins;
163 long lowerLimitGiven, upperLimitGiven, doSides, doSeparate;
164 double autoBinsTarget;
165 long autoBinsMinimum, autoBinsMaximum;
166 char *weightColumn;
167 double *weightData;
168 short normMode = NORMALIZE_NO;
169 unsigned long pipeFlags;
170 SCANNED_ARG *scanned;
171 SDDS_DATASET SDDSin, SDDSout;
172 double binSize, *lowerLimit = NULL, *upperLimit = NULL, *givenLowerLimit, *givenUpperLimit, *dx = NULL, range, middle;
173 double **inputData, *abscissa, *histogram, *minValue, *maxValue, transferLimit, *cdf, sum;
174 long *abscissaIndex, *histogramIndex, *cdfIndex;
175 double expandRange, maxRange;
176 char *CDFONLY;
177 unsigned long dummyFlags, majorOrderFlag;
178 short columnMajorOrder = -1;
179
181 argc = scanargs(&scanned, argc, argv);
182 if (argc < 3 || argc > (3 + N_OPTIONS)) {
183 fprintf(stderr, "%s", USAGE);
184 exit(EXIT_FAILURE);
185 }
186
187 minValue = maxValue = NULL;
188 output = input = NULL;
189 pipeFlags = 0;
190 abscissaName = NULL;
191 boundaryFile = boundaryColumn = boundaryColumnUnits = NULL;
192 boundaryValue = NULL;
193 offset = 0;
194 columnName = excludeName = NULL;
195 columnNames = excludeNames = abscissaNames = 0;
196 givenLowerLimits = givenUpperLimits = 0;
197 givenLowerLimit = givenUpperLimit = NULL;
198 bins = binsGiven = binSize = doSides = doSeparate = 0;
199 lowerLimitGiven = upperLimitGiven = 0;
200 expandRange = 0;
201 cdfOnly = 0;
202 freOnly = 1;
203 autoBinsTarget = 0;
204 autoBinsMinimum = autoBinsMaximum = 0;
205 weightColumn = NULL;
206 weightData = NULL;
207
208 for (iArg = 1; iArg < argc; iArg++) {
209 if (scanned[iArg].arg_type == OPTION) {
210 /* process options here */
211 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
212 case SET_MAJOR_ORDER:
213 majorOrderFlag = 0;
214 scanned[iArg].n_items--;
215 if (scanned[iArg].n_items > 0 && (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
216 SDDS_Bomb("invalid -majorOrder syntax/values");
217 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
218 columnMajorOrder = 1;
219 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
220 columnMajorOrder = 0;
221 break;
222 case SET_PIPE:
223 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
224 SDDS_Bomb("invalid -pipe syntax");
225 break;
226 case SET_COLUMNS:
227 if (columnName)
228 SDDS_Bomb("only one -columns option may be given");
229 if (scanned[iArg].n_items < 2)
230 SDDS_Bomb("invalid -columns syntax");
231 if (!(columnName = SDDS_Realloc(columnName, sizeof(*columnName) * (columnNames + scanned[iArg].n_items - 1))))
232 SDDS_Bomb("memory allocation failure");
233 for (i = 1; i < scanned[iArg].n_items; i++)
234 columnName[columnNames + i - 1] = scanned[iArg].list[i];
235 columnNames += scanned[iArg].n_items - 1;
236 break;
237 case SET_ABSCISSA:
238 if (abscissaName)
239 SDDS_Bomb("only one -abscissa option may be given");
240 if (scanned[iArg].n_items >= 2) {
241 if (!(abscissaName = SDDS_Realloc(abscissaName, sizeof(*abscissaName) * (abscissaNames + scanned[iArg].n_items - 1))))
242 SDDS_Bomb("memory allocation failure");
243 for (i = 1; i < scanned[iArg].n_items; i++)
244 abscissaName[abscissaNames + i - 1] = scanned[iArg].list[i];
245 abscissaNames += scanned[iArg].n_items - 1;
246 }
247 break;
248 case SET_BINS:
249 if (binsGiven)
250 SDDS_Bomb("-bins specified more than once");
251 binsGiven = 1;
252 if (sscanf(scanned[iArg].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
253 SDDS_Bomb("invalid value for bins---give a positive value");
254 break;
255 case SET_SIZEOFBINS:
256 if (sscanf(scanned[iArg].list[1], "%le", &binSize) != 1 || binSize <= 0)
257 SDDS_Bomb("invalid value for bin size---give a positive value");
258 break;
259 case SET_AUTOBINS:
260 if (scanned[iArg].n_items < 2)
261 SDDS_Bomb("incorrect -autoBins syntax");
262 scanned[iArg].n_items -= 1;
263 autoBinsTarget = autoBinsMinimum = autoBinsMaximum = 0;
264 if (!scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
265 "target", SDDS_DOUBLE, &autoBinsTarget, 1, 0,
266 "minimum", SDDS_LONG, &autoBinsMinimum, 1, 0,
267 "maximum", SDDS_LONG, &autoBinsMaximum, 1, 0, NULL) ||
268 autoBinsTarget <= 0 || autoBinsMinimum < 0 || autoBinsMaximum < 0)
269 SDDS_Bomb("incorrect -autoBins syntax or values");
270 break;
271 case SET_EXCLUDE:
272 if (excludeName)
273 SDDS_Bomb("only one -exclude option may be given");
274 if (scanned[iArg].n_items < 2)
275 SDDS_Bomb("invalid -exclude syntax");
276 if (!(excludeName = SDDS_Realloc(excludeName, sizeof(*excludeName) * (excludeNames + scanned[iArg].n_items - 1))))
277 SDDS_Bomb("memory allocation failure");
278 for (i = 1; i < scanned[iArg].n_items; i++)
279 excludeName[excludeNames + i - 1] = scanned[iArg].list[i];
280 excludeNames += scanned[iArg].n_items - 1;
281 break;
282 case SET_LOWERLIMIT:
283 if (lowerLimitGiven)
284 SDDS_Bomb("-lowerLimit specified more than once");
285 lowerLimitGiven = 1;
286 if (!(givenLowerLimit = SDDS_Realloc(givenLowerLimit, sizeof(*givenLowerLimit) * (givenLowerLimits + scanned[iArg].n_items - 1))))
287 SDDS_Bomb("SET_LOWERLIMIT: memory allocation failure");
288 for (i = 1; i < scanned[iArg].n_items; i++) {
289 if (sscanf(scanned[iArg].list[i], "%lf", &transferLimit) != 1)
290 SDDS_Bomb("invalid value for -lowerLimit");
291 givenLowerLimit[givenLowerLimits + i - 1] = transferLimit;
292 }
293 givenLowerLimits += scanned[iArg].n_items - 1;
294 break;
295 case SET_UPPERLIMIT:
296 if (upperLimitGiven)
297 SDDS_Bomb("-upperLimit specified more than once");
298 upperLimitGiven = 1;
299 if (!(givenUpperLimit = SDDS_Realloc(givenUpperLimit, sizeof(*givenUpperLimit) * (givenUpperLimits + scanned[iArg].n_items - 1))))
300 SDDS_Bomb("SET_UPPERLIMIT: memory allocation failure");
301 for (i = 1; i < scanned[iArg].n_items; i++) {
302 if (sscanf(scanned[iArg].list[i], "%lf", &transferLimit) != 1)
303 SDDS_Bomb("invalid value for -upperLimit");
304 givenUpperLimit[givenUpperLimits + i - 1] = transferLimit;
305 }
306 givenUpperLimits += scanned[iArg].n_items - 1;
307 break;
308 case SET_SIDES:
309 doSides = DO_SIDES;
310 if (scanned[iArg].n_items == 2) {
311 static char *sideOpt[2] = {"close", "against"};
312 switch (match_string(scanned[iArg].list[1], sideOpt, 2, 0)) {
313 case 0:
314 doSides = DO_CLOSE_SIDES;
315 break;
316 case 1:
317 doSides = DO_AGAINST_SIDES;
318 break;
319 default:
320 SDDS_Bomb("invalid value for -sides");
321 break;
322 }
323 }
324 break;
325 case SET_SEPARATE:
326 doSeparate = 1;
327 break;
328 case SET_EXPAND:
329 if (scanned[iArg].n_items != 2 ||
330 sscanf(scanned[iArg].list[1], "%lf", &expandRange) != 1 || expandRange <= 0)
331 SDDS_Bomb("invalid -expand syntax");
332 break;
333 case SET_CDF:
334 if (scanned[iArg].n_items == 1)
335 cdfOnly = 0;
336 else {
337 if (scanned[iArg].n_items > 2)
338 SDDS_Bomb("invalid -cdf syntax");
339 CDFONLY = scanned[iArg].list[1];
340 if (strcmp(CDFONLY, "only") != 0)
341 SDDS_Bomb("invalid -cdf value, it should be -cdf or -cdf=only");
342 cdfOnly = 1;
343 }
344 freOnly = 0;
345 break;
346 case SET_BOUNDARYDATA:
347 if (scanned[iArg].n_items != 3 ||
348 !(boundaryFile = scanned[iArg].list[1]) ||
349 !strlen(boundaryFile) ||
350 !(boundaryColumn = scanned[iArg].list[2]) ||
351 !strlen(boundaryColumn))
352 SDDS_Bomb("invalid -boundaryData syntax or values");
353 break;
354 case SET_WEIGHT:
355 if (scanned[iArg].n_items != 2 ||
356 !(weightColumn = scanned[iArg].list[1]) ||
357 !strlen(weightColumn))
358 SDDS_Bomb("invalid -weightColumn syntax or values");
359 break;
360 case SET_NORMALIZE:
361 if (scanned[iArg].n_items == 1)
362 normMode = NORMALIZE_SUM;
363 else if (scanned[iArg].n_items != 2 ||
364 (normMode = match_string(scanned[iArg].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
365 SDDS_Bomb("invalid -normalize syntax");
366 break;
367 default:
368 fprintf(stderr, "Error: unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
369 fprintf(stderr, "%s", USAGE);
370 exit(EXIT_FAILURE);
371 break;
372 }
373 } else {
374 if (!input)
375 input = scanned[iArg].list[0];
376 else if (!output)
377 output = scanned[iArg].list[0];
378 else
379 SDDS_Bomb("too many filenames seen");
380 }
381 }
382
383 if (boundaryColumn && (abscissaNames > 0 || doSeparate))
384 SDDS_Bomb("-boundaryData option is incompatible with -abscissa and -separate options");
385
386 if (columnNames <= 0)
387 SDDS_Bomb("Supply the names of columns to histogram with -columns");
388
389 processFilenames("sddsmultihist", &input, &output, pipeFlags, 0, NULL);
390
391 if (!SDDS_InitializeInput(&SDDSin, input)) {
392 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
393 exit(EXIT_FAILURE);
394 }
395
396 if ((columnNames = expandColumnPairNames(&SDDSin, &columnName, NULL, columnNames, excludeName, excludeNames, FIND_NUMERIC_TYPE, 0)) <= 0) {
397 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
398 SDDS_Bomb("No quantities selected to histogram.");
399 }
400
401 if (doSeparate) {
402 if (abscissaNames <= 0) {
403 if (!(abscissaName = SDDS_Realloc(abscissaName, sizeof(*abscissaName) * (abscissaNames + columnNames))))
404 SDDS_Bomb("memory allocation failure");
405 abscissaNames = 0;
406 for (i = 0; i < columnNames; i++) {
407 abscissaName[abscissaNames + i] = columnName[i];
408 }
409 abscissaNames += columnNames;
410 }
411 if (columnNames > 1) {
412 if (abscissaNames > 0) {
413 if (columnNames != abscissaNames)
414 SDDS_Bomb("the number of abscissa names must match the number of columns");
415 }
416 if (givenLowerLimits)
417 if (columnNames != givenLowerLimits)
418 SDDS_Bomb("the number of lower limits must match the number of columns");
419 if (givenUpperLimits)
420 if (columnNames != givenUpperLimits)
421 SDDS_Bomb("the number of upper limits must match the number of columns");
422 } else {
423 /* Handle single column with multiple option names */
424 if (abscissaNames > 0) {
425 if (columnNames != abscissaNames)
426 abscissaNames = 1;
427 }
428 if (givenLowerLimits)
429 if (columnNames != givenLowerLimits)
430 givenLowerLimits = 1;
431 if (givenUpperLimits)
432 if (columnNames != givenUpperLimits)
433 givenUpperLimits = 1;
434 }
435 } else if (boundaryFile) {
436 if (!(boundaryValue = ReadBoundaryData(boundaryFile, boundaryColumn, &nBoundaryValues, &boundaryColumnUnits))) {
437 SDDS_Bomb("Problem reading boundary data");
438 }
439 } else {
440 if (abscissaNames <= 0)
441 SDDS_Bomb("Supply the name of the abscissa with -abscissaName");
442 abscissaNames = 1;
443 }
444
445 SetUpOutput(&SDDSout, &SDDSin, &abscissaIndex, &cdfIndex, &histogramIndex, output, columnName, columnNames, abscissaName, abscissaNames, boundaryColumn, boundaryColumnUnits, columnMajorOrder, normMode);
446
447 if (!(inputData = (double **)malloc(columnNames * sizeof(*inputData))) ||
448 !(minValue = (double *)malloc(columnNames * sizeof(*minValue))) ||
449 !(maxValue = (double *)malloc(columnNames * sizeof(*maxValue))))
450 SDDS_Bomb("memory allocation failure");
451
452 if (((binSize ? 1 : 0) + (binsGiven ? 1 : 0) + (autoBinsTarget ? 1 : 0)) > 1)
453 SDDS_Bomb("Specify only one of -binSize, -bins, or -autoBins");
454 if (!binSize && !binsGiven && !autoBinsTarget) {
455 binsGiven = 1;
456 bins = 20;
457 }
458
459 abscissa = histogram = cdf = NULL;
460
461 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
462 if ((rows = SDDS_CountRowsOfInterest(&SDDSin)) < 0)
463 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
464
465 if (rows) {
466 if (weightColumn && !(weightData = SDDS_GetColumnInDoubles(&SDDSin, weightColumn)))
467 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
468
469 for (column = 0; column < columnNames; column++) {
470 if (!(inputData[column] = SDDS_GetColumnInDoubles(&SDDSin, columnName[column])))
471 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
472 }
473
474 if (!boundaryColumn) {
475 if (!(lowerLimit = (double *)malloc(sizeof(*lowerLimit) * columnNames)))
476 SDDS_Bomb("memory allocation failure");
477 if (!(upperLimit = (double *)malloc(sizeof(*upperLimit) * columnNames)))
478 SDDS_Bomb("memory allocation failure");
479 if (!(dx = (double *)malloc(sizeof(*dx) * columnNames)))
480 SDDS_Bomb("memory allocation failure");
481
482 if (doSeparate) {
483 for (column = 0; column < columnNames; column++) {
484 find_min_max(&minValue[column], &maxValue[column], inputData[column], rows);
485 lowerLimit[column] = givenLowerLimits ? givenLowerLimit[column] : minValue[column];
486 upperLimit[column] = givenUpperLimits ? givenUpperLimit[column] : maxValue[column];
487 }
488 } else {
489 for (column = 0; column < columnNames; column++)
490 find_min_max(&minValue[column], &maxValue[column], inputData[column], rows);
491 lowerLimit[0] = givenLowerLimits ? givenLowerLimit[0] : min_in_array(minValue, columnNames);
492 upperLimit[0] = givenUpperLimits ? givenUpperLimit[0] : max_in_array(maxValue, columnNames);
493 for (column = 1; column < columnNames; column++) {
494 lowerLimit[column] = lowerLimit[0];
495 upperLimit[column] = upperLimit[0];
496 }
497 }
498
499 range = (1 + expandRange) * (upperLimit[0] - lowerLimit[0]);
500 if (autoBinsTarget) {
501 bins = (int64_t)(rows / autoBinsTarget);
502 if (autoBinsMinimum) {
503 if (bins < autoBinsMinimum)
504 bins = autoBinsMinimum;
505 } else if (bins < 5) {
506 bins = 5;
507 }
508 if (autoBinsMaximum) {
509 if (bins > autoBinsMaximum)
510 bins = autoBinsMaximum;
511 } else if (bins > rows)
512 bins = rows;
513 }
514
515 if (binSize) {
516 maxRange = range;
517 for (column = 0; column < columnNames; column++) {
518 range = (1 + expandRange) * (upperLimit[column] - lowerLimit[column]);
519 range = ((range / binSize) + 1) * binSize;
520 if (range > maxRange)
521 maxRange = range;
522 middle = (lowerLimit[column] + upperLimit[column]) / 2;
523 lowerLimit[column] = middle - range / 2;
524 upperLimit[column] = middle + range / 2;
525 }
526 range = maxRange;
527 if (!doSeparate) {
528 lowerLimit[0] = min_in_array(lowerLimit, columnNames);
529 upperLimit[0] = max_in_array(upperLimit, columnNames);
530 dx[0] = binSize;
531 }
532 bins = maxRange / binSize + 0.5;
533 if (bins < 1 && !doSides)
534 bins = 2;
535 if (doSeparate)
536 for (column = 0; column < columnNames; column++) {
537 range = upperLimit[column] - lowerLimit[column];
538 upperLimit[column] += (maxRange - range) / 2;
539 lowerLimit[column] -= (maxRange - range) / 2;
540 dx[column] = binSize;
541 }
542 } else {
543 if (doSeparate) {
544 for (column = 0; column < columnNames; column++) {
545 range = (1 + expandRange) * (upperLimit[column] - lowerLimit[column]);
546 middle = (upperLimit[column] + lowerLimit[column]) / 2;
547 upperLimit[column] = middle + range / 2;
548 lowerLimit[column] = middle - range / 2;
549 if (upperLimit[column] == lowerLimit[column]) {
550 if (fabs(upperLimit[column]) < sqrt(DBL_MIN)) {
551 upperLimit[column] = sqrt(DBL_MIN);
552 lowerLimit[column] = -sqrt(DBL_MIN);
553 } else {
554 lowerLimit[column] = upperLimit[column] * (1 - 10000 * DBL_EPSILON);
555 upperLimit[column] = upperLimit[column] * (1 + 10000 * DBL_EPSILON);
556 }
557 }
558 dx[column] = (upperLimit[column] - lowerLimit[column]) / bins;
559 }
560 } else {
561 range = (1 + expandRange) * (upperLimit[0] - lowerLimit[0]);
562 middle = (upperLimit[0] + lowerLimit[0]) / 2;
563 upperLimit[0] = middle + range / 2;
564 lowerLimit[0] = middle - range / 2;
565 if (upperLimit[0] == lowerLimit[0]) {
566 if (fabs(upperLimit[0]) < sqrt(DBL_MIN)) {
567 upperLimit[0] = sqrt(DBL_MIN);
568 lowerLimit[0] = -sqrt(DBL_MIN);
569 } else {
570 lowerLimit[0] = upperLimit[0] * (1 - 10000 * DBL_EPSILON);
571 upperLimit[0] = upperLimit[0] * (1 + 10000 * DBL_EPSILON);
572 }
573 }
574 dx[0] = (upperLimit[0] - lowerLimit[0]) / bins;
575 }
576 }
577 if (!binsGiven || !abscissa) {
578 if (!(abscissa = SDDS_Realloc(abscissa, sizeof(*abscissa) * (bins + 2))) ||
579 !(cdf = SDDS_Realloc(cdf, sizeof(*cdf) * (bins + 2))) ||
580 !(histogram = SDDS_Realloc(histogram, sizeof(*histogram) * (bins + 2))))
581 SDDS_Bomb("memory allocation failure");
582 }
583 writeBins = bins + (doSides ? 2 : 0);
584 offset = doSides ? 0 : 1;
585 } else {
586 bins = writeBins = nBoundaryValues;
587 abscissa = NULL;
588 if (!(cdf = SDDS_Realloc(cdf, sizeof(*cdf) * bins)) ||
589 !(histogram = SDDS_Realloc(histogram, sizeof(*histogram) * bins)))
590 SDDS_Bomb("memory allocation failure");
591 }
592
593 if (!SDDS_StartPage(&SDDSout, writeBins) || !SDDS_CopyParameters(&SDDSout, &SDDSin))
594 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
595
596 if (boundaryColumn) {
597 for (column = 0; column < columnNames; column++) {
598 MakeBoundaryHistogram(histogram, cdf, boundaryValue, nBoundaryValues, inputData[column], weightData, rows);
599 NormalizeHistogram(histogram, nBoundaryValues, normMode);
600 if (!cdfOnly && !SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, histogram, writeBins, histogramIndex[column]))
601 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
602 if (!freOnly && !SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, cdf, writeBins, cdfIndex[column]))
603 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
604 free(inputData[column]);
605 }
606 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, boundaryValue, writeBins, boundaryColumn))
607 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
608 } else if (!doSeparate) {
609 for (i = -1; i < bins + 1; i++)
610 abscissa[i + 1] = (i + 0.5) * dx[0] + lowerLimit[0];
611 switch (doSides) {
612 case DO_CLOSE_SIDES:
613 abscissa[0] = abscissa[1] - dx[0] / 2;
614 abscissa[bins + 1] = abscissa[bins] + dx[0] / 2;
615 break;
616 case DO_AGAINST_SIDES:
617 abscissa[0] = abscissa[1];
618 abscissa[bins + 1] = abscissa[bins];
619 break;
620 }
621 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, abscissa + offset, writeBins, abscissaIndex[0]))
622 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
623 for (column = 0; column < columnNames; column++) {
624 histogram[0] = histogram[bins + 1] = 0;
625 if (!weightColumn)
626 make_histogram(histogram + 1, bins, lowerLimit[0], upperLimit[0], inputData[column], rows, 1);
627 else
628 make_histogram_weighted(histogram + 1, bins, lowerLimit[0], upperLimit[0], inputData[column], rows, 1, weightData);
629 NormalizeHistogram(histogram, bins, normMode);
630 sum = 0;
631 for (i = 0; i <= bins + 1; i++)
632 sum += histogram[i];
633 for (i = 0; i <= bins + 1; i++) {
634 if (!i)
635 cdf[i] = histogram[i] / sum;
636 else
637 cdf[i] = cdf[i - 1] + histogram[i] / sum;
638 }
639 if (!cdfOnly) {
640 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, histogram + offset, writeBins, histogramIndex[column]))
641 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
642 }
643 if (!freOnly) {
644 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, cdf + offset, writeBins, cdfIndex[column]))
645 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
646 }
647 free(inputData[column]);
648 }
649 } else {
650 for (column = 0; column < columnNames; column++) {
651 for (i = -1; i < bins + 1; i++)
652 abscissa[i + 1] = (i + 0.5) * dx[column] + lowerLimit[column];
653 switch (doSides) {
654 case DO_CLOSE_SIDES:
655 abscissa[0] = abscissa[1] - dx[column] / 2;
656 abscissa[bins + 1] = abscissa[bins] + dx[column] / 2;
657 break;
658 case DO_AGAINST_SIDES:
659 abscissa[0] = abscissa[1];
660 abscissa[bins + 1] = abscissa[bins];
661 break;
662 }
663 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, abscissa + offset, writeBins, abscissaIndex[column]))
664 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
665 histogram[0] = histogram[bins + 1] = 0;
666 if (!weightColumn)
667 make_histogram(histogram + 1, bins, lowerLimit[column], upperLimit[column], inputData[column], rows, 1);
668 else
669 make_histogram_weighted(histogram + 1, bins, lowerLimit[column], upperLimit[column], inputData[column], rows, 1, weightData);
670 NormalizeHistogram(histogram, bins + 2, normMode);
671 sum = 0;
672 for (i = 0; i <= bins + 1; i++)
673 sum += histogram[i];
674 for (i = 0; i <= bins + 1; i++) {
675 if (!i)
676 cdf[i] = histogram[i] / sum;
677 else
678 cdf[i] = cdf[i - 1] + histogram[i] / sum;
679 }
680 if (!cdfOnly) {
681 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, histogram + offset, writeBins, histogramIndex[column]))
682 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
683 }
684 if (!freOnly) {
685 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, cdf + offset, writeBins, cdfIndex[column]))
686 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
687 }
688 free(inputData[column]);
689 }
690 }
691 } else {
692 if (!SDDS_StartPage(&SDDSout, rows ? bins : 0))
693 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
694 }
695 if (weightData) {
696 free(weightData);
697 weightData = NULL;
698 }
699 if (!SDDS_WritePage(&SDDSout))
700 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
701 }
702
703 if (!SDDS_Terminate(&SDDSin)) {
704 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
705 exit(EXIT_FAILURE);
706 }
707 if (!SDDS_Terminate(&SDDSout)) {
708 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
709 exit(EXIT_FAILURE);
710 }
711
712 free(histogram);
713 free(cdf);
714 free(abscissa);
715 if (minValue)
716 free(minValue);
717 if (maxValue)
718 free(maxValue);
719 if (lowerLimit)
720 free(lowerLimit);
721 if (upperLimit)
722 free(upperLimit);
723 if (dx)
724 free(dx);
725
726 return EXIT_SUCCESS;
727}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
Definition findMinMax.c:318
double min_in_array(double *array, long n)
Finds the minimum value in an array of doubles.
Definition findMinMax.c:336
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ MakeBoundaryHistogram()

void MakeBoundaryHistogram ( double * histogram,
double * cdf,
double * boundaryValue,
int64_t nBoundaryValues,
double * data,
double * weight,
int64_t nData )

Definition at line 830 of file sddsmultihist.c.

831 {
832 int64_t i, j;
833 for (i = 0; i < nBoundaryValues; i++)
834 histogram[i] = cdf[i] = 0;
835 for (i = 0; i < nData; i++) {
836 j = binaryArraySearch(boundaryValue, sizeof(double), nBoundaryValues, &data[i], double_cmpasc, 1) + 1;
837 if (j < nBoundaryValues && j >= 0)
838 histogram[j] += weight ? fabs(weight[i]) : 1;
839 }
840 cdf[0] = histogram[0];
841 for (j = 1; j < nBoundaryValues; j++) {
842 cdf[j] = cdf[j - 1] + histogram[j];
843 }
844 if (cdf[nBoundaryValues - 1] > 0)
845 for (j = 0; j < nBoundaryValues; j++)
846 cdf[j] /= cdf[nBoundaryValues - 1];
847}
long binaryArraySearch(void *array, size_t elemSize, long members, void *key, int(*compare)(const void *c1, const void *c2), long bracket)
Searches for a key in a sorted array of data values using binary search.
Definition binsert.c:156
int double_cmpasc(const void *a, const void *b)
Compare two doubles in ascending order.

◆ NormalizeHistogram()

void NormalizeHistogram ( double * hist,
int64_t bins,
short mode )

Definition at line 849 of file sddsmultihist.c.

849 {
850 int64_t i;
851 double value = 0;
852 switch (mode) {
853 case NORMALIZE_SUM:
854 for (i = 0; i < bins; i++)
855 value += hist[i];
856 break;
857 case NORMALIZE_PEAK:
858 for (i = 0; i < bins; i++)
859 if (hist[i] > value)
860 value = hist[i];
861 break;
862 default:
863 return;
864 break;
865 }
866 if (value)
867 for (i = 0; i < bins; i++)
868 hist[i] /= value;
869}

◆ ReadBoundaryData()

double * ReadBoundaryData ( char * file,
char * column,
int64_t * n,
char ** units )

Definition at line 798 of file sddsmultihist.c.

798 {
799 SDDS_DATASET SDDSin;
800 double *data;
801 int64_t j;
802
803 if (!SDDS_InitializeInput(&SDDSin, file)) {
804 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
805 }
806
807 if (SDDS_GetColumnInformation(&SDDSin, "units", units, SDDS_GET_BY_NAME, column) != SDDS_STRING)
808 return NULL;
809
810 if (SDDS_ReadPage(&SDDSin) <= 0) {
811 SDDS_SetError("No pages in boundary data file");
812 return NULL;
813 }
814
815 *n = SDDS_RowCount(&SDDSin);
816 if (*n <= 0)
817 return NULL;
818 if (!(data = SDDS_GetColumnInDoubles(&SDDSin, column)))
819 return NULL;
820 for (j = 1; j < (*n); j++) {
821 if (data[j] <= data[j - 1]) {
822 memmove(data + j, data + j + 1, sizeof(*data) * ((*n) - 1 - j));
823 *n -= 1;
824 j -= 1;
825 }
826 }
827 return data;
828}
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85

◆ SetUpOutput()

void SetUpOutput ( SDDS_DATASET * SDDSout,
SDDS_DATASET * SDDSin,
long ** abscissaIndex,
long ** cdfIndex,
long ** histogramIndex,
char * output,
char ** columnName,
long columnNames,
char ** abscissaName,
long abscissaNames,
char * boundaryColumn,
char * boundaryColumnUnits,
short columnMajorOrder,
short normMode )

Definition at line 729 of file sddsmultihist.c.

732 {
733 long column;
734 char s[SDDS_MAXLINE];
735 int32_t outputType = SDDS_DOUBLE;
736 char *blankString = "";
737
738 if (!SDDS_InitializeOutput(SDDSout, SDDS_BINARY, 0, NULL, "sddsmultihist output", output))
739 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
740
741 if (columnMajorOrder != -1)
742 SDDSout->layout.data_mode.column_major = columnMajorOrder;
743 else
744 SDDSout->layout.data_mode.column_major = SDDSin->layout.data_mode.column_major;
745
746 if (!(*cdfIndex = (long *)malloc(sizeof(**cdfIndex) * columnNames)))
747 SDDS_Bomb("memory allocation failure");
748 if (!cdfOnly) {
749 if (!(*histogramIndex = (long *)malloc(sizeof(**histogramIndex) * columnNames)))
750 SDDS_Bomb("memory allocation failure");
751 }
752 if (!boundaryColumn) {
753 if (!(*abscissaIndex = (long *)malloc(sizeof(**abscissaIndex) * columnNames)))
754 SDDS_Bomb("memory allocation failure");
755
756 for (column = 0; column < abscissaNames; column++) {
757 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, columnName[column], abscissaName[column]) ||
758 !SDDS_ChangeColumnInformation(SDDSout, "type", &outputType, SDDS_BY_NAME, abscissaName[column]) ||
759 ((*abscissaIndex)[column] = SDDS_GetColumnIndex(SDDSout, abscissaName[column])) < 0)
760 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
761 if (strcmp(columnName[column], abscissaName[column]) != 0 &&
762 (!SDDS_ChangeColumnInformation(SDDSout, "description", blankString, SDDS_BY_NAME, abscissaName[column]) ||
763 !SDDS_ChangeColumnInformation(SDDSout, "symbol", blankString, SDDS_BY_NAME, abscissaName[column])))
764 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
765 }
766 } else {
767 if (!SDDS_DefineSimpleColumn(SDDSout, boundaryColumn, boundaryUnits, SDDS_DOUBLE))
768 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
769 }
770
771 for (column = 0; column < columnNames; column++) {
772 if (!freOnly) {
773 sprintf(s, "%sCdf", columnName[column]);
774 if (((*cdfIndex)[column] = SDDS_DefineColumn(SDDSout, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
775 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
776 }
777 if (!cdfOnly) {
778 switch (normMode) {
779 case NORMALIZE_PEAK:
780 sprintf(s, "%sRelativeFrequency", columnName[column]);
781 break;
782 case NORMALIZE_SUM:
783 sprintf(s, "%sFractionalFrequency", columnName[column]);
784 break;
785 default:
786 sprintf(s, "%sFrequency", columnName[column]);
787 break;
788 }
789 if (((*histogramIndex)[column] = SDDS_DefineColumn(SDDSout, s, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
790 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
791 }
792 }
793 if (!SDDS_TransferAllParameterDefinitions(SDDSout, SDDSin, SDDS_TRANSFER_KEEPOLD) ||
794 !SDDS_WriteLayout(SDDSout))
795 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
796}
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.

Variable Documentation

◆ cdfOnly

short cdfOnly
static

Definition at line 150 of file sddsmultihist.c.

◆ freOnly

short freOnly
static

Definition at line 150 of file sddsmultihist.c.

◆ normalize_option

char* normalize_option[N_NORMALIZE_OPTIONS]
Initial value:
= {
"peak", "sum", "no"}

Definition at line 138 of file sddsmultihist.c.

138 {
139 "peak", "sum", "no"};

◆ option

char* option[N_OPTIONS]
Initial value:
= {
"columns",
"pipe",
"exclude",
"abscissa",
"bins",
"sizeofbins",
"lowerlimit",
"upperlimit",
"sides",
"separate",
"expand",
"cdf",
"autobins",
"majorOrder",
"boundaryData",
"weightColumn",
"normalize",
}

Definition at line 73 of file sddsmultihist.c.

73 {
74 "columns",
75 "pipe",
76 "exclude",
77 "abscissa",
78 "bins",
79 "sizeofbins",
80 "lowerlimit",
81 "upperlimit",
82 "sides",
83 "separate",
84 "expand",
85 "cdf",
86 "autobins",
87 "majorOrder",
88 "boundaryData",
89 "weightColumn",
90 "normalize",
91};

◆ USAGE

char* USAGE
static

Definition at line 93 of file sddsmultihist.c.