100char *option[N_OPTIONS] = {
101 "bins",
"lowerlimit",
"upperlimit",
"datacolumn",
102 "filter",
"sizeofbins",
"weightcolumn",
103 "normalize",
"statistics",
"sides",
"verbose",
"pipe",
"cdf",
"expand",
104 "majorOrder",
"regions",
"threads"
108 "Usage: sddshist [<inputfile>] [<outputfile>]\n"
109 " [-pipe=[input][,output]]\n"
110 " -dataColumn=<column-name>\n"
112 " -bins=<number> |\n"
113 " -sizeOfBins=<value> |\n"
114 " -regions=filename=<filename>,position=<columnName>,name=<columnName>\n"
116 " [-lowerLimit=<value>]\n"
117 " [-upperLimit=<value>]\n"
118 " [-expand=<factor>]\n"
119 " [-filter=<column-name>,<lower-limit>,<upper-limit>]\n"
120 " [-weightColumn=<column-name>]\n"
121 " [-sides[=<points>]]\n"
122 " [-normalize[={sum|area|peak}]]\n"
124 " [-threads=<number>]\n"
127 " [-majorOrder=row|column]\n"
129 " -pipe=[input][,output] Use pipe for input and/or output.\n"
130 " -dataColumn=<column-name> Specify the column to histogram.\n"
131 " -bins=<number> Set the number of bins for the histogram.\n"
132 " -sizeOfBins=<value> Set the size of each bin.\n"
133 " -regions=filename=<filename>,position=<columnName>,name=<columnName>\n"
134 " Define region-based histogramming.\n"
135 " -lowerLimit=<value> Set the lower limit of the histogram.\n"
136 " -upperLimit=<value> Set the upper limit of the histogram.\n"
137 " -expand=<factor> Expand the range of the histogram by the given factor.\n"
138 " -filter=<column-name>,<lower>,<upper> Filter data points based on column values.\n"
139 " -weightColumn=<column-name> Weight the histogram with the specified column.\n"
140 " -sides[=<points>] Add sides to the histogram down to zero level.\n"
141 " -normalize[={sum|area|peak}] Normalize the histogram.\n"
142 " -cdf[=only] Include the CDF in the output. Use 'only' to exclude the histogram.\n"
143 " -threads=<number> Specify the number of threads to use.\n"
144 " -statistics Include statistical information in the output.\n"
145 " -verbose Enable informational printouts during processing.\n"
146 " -majorOrder=row|column Set the major order of data.\n\n"
147 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
149#define NORMALIZE_PEAK 0
150#define NORMALIZE_AREA 1
151#define NORMALIZE_SUM 2
152#define NORMALIZE_NO 3
153#define N_NORMALIZE_OPTIONS 4
154char *normalize_option[N_NORMALIZE_OPTIONS] = {
155 "peak",
"area",
"sum",
"no"
158static int64_t filter(
double *x,
double *y,
double *filterData, int64_t npts,
double lower_filter,
double upper_filter);
160 char *dataColumn,
char *weightColumn,
char *filterColumn,
double lowerFilter,
161 double upperFilter,
SDDS_DATASET *regionTable,
char *regionNameColumn,
long doStats,
162 int64_t bins,
double binSize,
long normalizeMode,
short columnMajorOrder);
165static long iIndep, iFreq, iBins, iBinSize, iLoFilter, iUpFilter, iMean, iRMS, iStDev, iPoints, iCdf;
166static short cdfOnly, freOnly;
168int64_t readRegionFile(
SDDS_DATASET *SDDSin,
char *filename,
char *positionColumn,
char *nameColumn,
double **regionPosition,
char ***regionName);
169void classifyByRegion(
double *data,
double *weight, int64_t points,
double *histogram,
double *regionPosition, int64_t bins);
171int main(
int argc,
char **argv) {
173 long binsGiven, lowerLimitGiven, upperLimitGiven;
178 double *hist, *hist1;
182 double lowerLimit, upperLimit;
183 double givenLowerLimit, givenUpperLimit;
184 double range, binSize;
187 double mean, rms, standDev, mad;
188 char *filterColumn, *dataColumn, *weightColumn;
189 double lowerFilter = 0, upperFilter = 0;
191 SCANNED_ARG *scanned;
192 char *inputfile, *outputfile;
196 long normalizeMode, doSides, verbose, readCode;
198 unsigned long pipeFlags, majorOrderFlag, regionFlags = 0;
200 double expansionFactor = 0;
201 short columnMajorOrder = -1;
202 char *regionFilename = NULL, *regionPositionColumn = NULL, *regionNameColumn = NULL;
203 double *regionPosition = NULL;
204 int64_t nRegions = 0;
205 char **regionName = NULL;
210 argc =
scanargs(&scanned, argc, argv);
212 fprintf(stderr,
"%s\n", USAGE);
216 binsGiven = lowerLimitGiven = upperLimitGiven = 0;
217 binSize = doSides = 0;
218 inputfile = outputfile = NULL;
219 dataColumn = filterColumn = weightColumn = NULL;
220 doStats = verbose = 0;
221 normalizeMode = NORMALIZE_NO;
227 for (i = 1; i < argc; i++) {
228 if (scanned[i].arg_type == OPTION) {
229 switch (
match_string(scanned[i].list[0], option, N_OPTIONS, 0)) {
230 case SET_MAJOR_ORDER:
232 scanned[i].n_items--;
233 if (scanned[i].n_items > 0 && (!
scanItemList(&majorOrderFlag, scanned[i].list + 1, &scanned[i].n_items, 0,
"row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
"column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
234 SDDS_Bomb(
"invalid -majorOrder syntax/values");
235 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
236 columnMajorOrder = 1;
237 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
238 columnMajorOrder = 0;
242 SDDS_Bomb(
"-bins specified more than once");
244 if (sscanf(scanned[i].list[1],
"%" SCNd64, &bins) != 1 || bins <= 0)
249 SDDS_Bomb(
"-lowerLimit specified more than once");
251 if (sscanf(scanned[i].list[1],
"%lf", &givenLowerLimit) != 1)
252 SDDS_Bomb(
"invalid value for lowerLimit");
256 SDDS_Bomb(
"-upperLimit specified more than once");
258 if (sscanf(scanned[i].list[1],
"%lf", &givenUpperLimit) != 1)
259 SDDS_Bomb(
"invalid value for upperLimit");
263 if (sscanf(scanned[i].list[1],
"%lf", &expansionFactor) != 1 || expansionFactor <= 0)
268 SDDS_Bomb(
"-dataColumn specified more than once");
269 if (scanned[i].n_items != 2)
270 SDDS_Bomb(
"invalid -dataColumn syntax---supply name");
271 dataColumn = scanned[i].list[1];
275 SDDS_Bomb(
"multiple filter specifications not allowed");
276 if (scanned[i].n_items != 4 || sscanf(scanned[i].list[2],
"%lf", &lowerFilter) != 1 ||
277 sscanf(scanned[i].list[3],
"%lf", &upperFilter) != 1 || lowerFilter > upperFilter)
278 SDDS_Bomb(
"invalid -filter syntax/values");
279 filterColumn = scanned[i].list[1];
281 case SET_WEIGHTCOLUMN:
283 SDDS_Bomb(
"multiple weighting columns not allowed");
284 if (scanned[i].n_items != 2)
285 SDDS_Bomb(
"-weightColumn requires a column name");
286 weightColumn = scanned[i].list[1];
289 if (scanned[i].n_items == 1)
290 normalizeMode = NORMALIZE_SUM;
291 else if (scanned[i].n_items != 2 || (normalizeMode =
match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
298 if (scanned[i].n_items == 1)
300 else if (scanned[i].n_items > 2 || (sscanf(scanned[i].list[1],
"%ld", &doSides) != 1 || doSides <= 0))
307 if (sscanf(scanned[i].list[1],
"%le", &binSize) != 1 || binSize <= 0)
315 if (scanned[i].n_items == 1)
318 if (scanned[i].n_items != 2)
320 cdf = scanned[i].list[1];
321 if (strcmp(cdf,
"only") != 0)
322 SDDS_Bomb(
"invalid -cdf value, it should be -cdf or -cdf=only");
327 case SET_REGION_FILE:
328 if (scanned[i].n_items != 4)
331 scanned[i].n_items -= 1;
332 if (!
scanItemList(®ionFlags, scanned[i].list + 1, &scanned[i].n_items, 0,
334 "position",
SDDS_STRING, ®ionPositionColumn, 1, 2,
335 "name",
SDDS_STRING, ®ionNameColumn, 1, 4, NULL) ||
336 regionFlags != (1 + 2 + 4) || !regionFilename || !regionPositionColumn || !regionNameColumn)
340 if (scanned[i].n_items != 2 ||
341 !sscanf(scanned[i].list[1],
"%d", &threads) || threads < 1)
345 fprintf(stderr,
"Error: option %s not recognized\n", scanned[i].list[0]);
352 inputfile = scanned[i].list[0];
353 else if (!outputfile)
354 outputfile = scanned[i].list[0];
362 if (binSize && binsGiven && regionFlags)
363 SDDS_Bomb(
"Provide only one of -bins, -sizeOfBins, or -regions");
367 SDDS_Bomb(
"-dataColumn must be specified");
370 if (!(nRegions = readRegionFile(&SDDSregion, regionFilename, regionPositionColumn, regionNameColumn, ®ionPosition, ®ionName)))
371 SDDS_Bomb(
"Problem with region file. Check existence and type of columns");
376 hist =
tmalloc(
sizeof(*hist) * (bins + 2 * doSides));
377 CDF = CDF1 =
tmalloc(
sizeof(*hist) * (bins + 2 * doSides));
378 indep =
tmalloc(
sizeof(*indep) * (bins + 2 * doSides));
386 if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, &SDDSregion, regionNameColumn, doStats, bins, binSize, normalizeMode, columnMajorOrder))
389 data = weightData = filterData = NULL;
398 if (rows && filterColumn)
399 points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter);
413 classifyByRegion(data, weightData, points, hist, regionPosition, bins);
416 if (!lowerLimitGiven) {
417 lowerLimit = (points > 0) ? data[0] : 0;
418 for (i = 0; i < points; i++)
419 if (lowerLimit > data[i])
420 lowerLimit = data[i];
422 lowerLimit = givenLowerLimit;
424 if (!upperLimitGiven) {
425 upperLimit = (points > 0) ? data[0] : 0;
426 for (i = 0; i < points; i++)
427 if (upperLimit < data[i])
428 upperLimit = data[i];
430 upperLimit = givenUpperLimit;
433 range = upperLimit - lowerLimit;
434 if (!lowerLimitGiven)
435 lowerLimit -= range * 1e-7;
436 if (!upperLimitGiven)
437 upperLimit += range * 1e-7;
438 if (upperLimit == lowerLimit) {
440 upperLimit += binSize / 2;
441 lowerLimit -= binSize / 2;
442 }
else if (fabs(upperLimit) < sqrt(DBL_MIN)) {
443 upperLimit = sqrt(DBL_MIN);
444 lowerLimit = -sqrt(DBL_MIN);
446 upperLimit += upperLimit * (1 + 2 * DBL_EPSILON);
447 lowerLimit -= upperLimit * (1 - 2 * DBL_EPSILON);
450 if (expansionFactor > 0) {
451 double center = (upperLimit + lowerLimit) / 2;
452 range = expansionFactor * (upperLimit - lowerLimit);
453 lowerLimit = center - range / 2;
454 upperLimit = center + range / 2;
456 dx = (upperLimit - lowerLimit) / bins;
460 range = ((range / binSize) + 1) * binSize;
461 middle = (lowerLimit + upperLimit) / 2;
462 lowerLimit = middle - range / 2;
463 upperLimit = middle + range / 2;
465 bins = range / binSize + 0.5;
466 if (bins < 1 && !doSides)
468 indep =
trealloc(indep,
sizeof(*indep) * (bins + 2 * doSides));
469 hist =
trealloc(hist,
sizeof(*hist) * (bins + 2 * doSides));
470 CDF =
trealloc(CDF,
sizeof(*hist) * (bins + 2 * doSides));
473 for (i = -doSides; i < bins + doSides; i++)
474 indep[i + doSides] = (i + 0.5) * dx + lowerLimit;
475 hist1 = hist + doSides;
476 CDF1 = CDF + doSides;
478 hist[0] = hist[bins + doSides] = 0;
482 pointsBinned =
make_histogram(hist1, bins, lowerLimit, upperLimit, data, points, 1);
488 for (i = 0; i < bins + doSides; i++) {
491 CDF1[0] = hist1[0] / sum;
492 for (i = 1; i < bins + doSides; i++) {
493 CDF1[i] = CDF1[i - 1] + hist1[i] / sum;
497 fprintf(stderr,
"%ld points of %" PRId64
" from page %ld histogrammed in %" PRId64
" bins\n", pointsBinned, rows, readCode, bins);
499 if (normalizeMode != NORMALIZE_NO) {
501 switch (normalizeMode) {
507 for (i = 0; i < bins; i++)
509 if (normalizeMode == NORMALIZE_AREA)
513 SDDS_Bomb(
"invalid normalize mode--consult programmer.");
517 for (i = 0; i < bins; i++)
526 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
529 if (!
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, regionPosition, bins, iIndep) ||
530 !
SDDS_SetColumn(&outTable, SDDS_SET_BY_NAME, regionName, bins, regionNameColumn))
532 if (!freOnly && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins, iCdf))
534 if (!cdfOnly && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins, iFreq))
540 (points && (!
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, indep, bins + 2 * doSides, iIndep))) ||
541 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
544 if (points && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins + 2 * doSides, iCdf))
548 if (points && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins + 2 * doSides, iFreq))
553 if (filterColumn && points &&
554 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iLoFilter, lowerFilter, iUpFilter, upperFilter, -1))
556 if (doStats && points &&
557 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iMean, mean, iRMS, rms, iStDev, standDev, -1))
568 data = weightData = filterData = NULL;
586static int64_t filter(
double *x,
double *y,
double *filterData, int64_t npts,
double lower_filter,
double upper_filter) {
588 static char *keep = NULL;
589 static int64_t maxPoints = 0;
591 if (maxPoints < npts)
592 keep =
trealloc(keep,
sizeof(*keep) * (maxPoints = npts));
594 for (i = 0; i < npts; i++) {
595 if (filterData[i] < lower_filter || filterData[i] > upper_filter)
601 for (i = j = 0; i < npts; i++)
608 filterData[j] = filterData[i];
617 char *dataColumn,
char *weightColumn,
char *filterColumn,
double lowerFilter,
618 double upperFilter,
SDDS_DATASET *regionTable,
char *regionNameColumn,
long doStats,
619 int64_t bins,
double binSize,
long normalizeMode,
short columnMajorOrder) {
620 char *symbol, *units, *dataUnits, *outputFormat;
626 if (columnMajorOrder != -1)
627 outTable->layout.data_mode.column_major = columnMajorOrder;
629 outTable->layout.data_mode.column_major = inTable->layout.data_mode.column_major;
645 switch (normalizeMode) {
647 symbol =
"RelativeFrequency";
651 symbol =
"NormalizedFrequency";
653 units =
tmalloc(
sizeof(*units) * (strlen(dataUnits) + 5));
654 if (strchr(dataUnits,
' '))
655 sprintf(units,
"1/(%s)", dataUnits);
657 sprintf(units,
"1/%s", dataUnits);
662 symbol =
"FractionalFrequency";
667 char *weightUnits = NULL;
670 symbol =
"WeightedNumberOfOccurrences";
673 symbol =
"NumberOfOccurrences";
685 sprintf(s,
"%sCdf", dataColumn);
713 buffer =
tmalloc(
sizeof(*buffer) * (strlen(dataColumn) + 20));
714 sprintf(buffer,
"%sMean", dataColumn);
717 sprintf(buffer,
"%sRms", dataColumn);
720 sprintf(buffer,
"%sStDev", dataColumn);
732int64_t readRegionFile(
SDDS_DATASET *SDDSin,
char *filename,
char *positionColumn,
char *nameColumn,
double **regionPosition,
char ***regionName) {
735 SDDS_ReadPage(SDDSin) != 1 || (rows = SDDS_RowCount(SDDSin)) < 1 ||
739 for (i = 1; i < rows; i++)
740 if ((*regionPosition)[i] <= (*regionPosition)[i - 1]) {
741 fprintf(stderr,
"sddshist: Error in region position data: row %" PRId64
" is %21.15e while row %" PRId64
" is %21.15e\n",
742 i - 1, (*regionPosition)[i - 1], i, (*regionPosition)[i]);
745 *regionPosition =
SDDS_Realloc(*regionPosition,
sizeof(**regionPosition) * (rows + 1));
746 (*regionPosition)[rows] = DBL_MAX;
747 *regionName =
SDDS_Realloc(*regionName,
sizeof(**regionName) * (rows + 1));
748 cp_str(&(*regionName)[rows],
"Beyond");
752void classifyByRegion(
double *data,
double *weight, int64_t points,
double *histogram,
double *regionPosition, int64_t bins) {
755 for (iBin = 0; iBin < bins; iBin++)
758 for (iData = 0; iData < points; iData++) {
760 for (iBin = 0; iBin < bins - 1; iBin++) {
761 if (data[iData] < regionPosition[iBin])
765 histogram[iBin] += weight[iData];
767 histogram[iBin] += 1;
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeWeightedMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, double *w, long n, long numThreads)
Computes weighted statistical moments of an array using multiple threads.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.