137 {
138
139 long binsGiven, lowerLimitGiven, upperLimitGiven;
141 double *data;
142 double *filterData;
143 double *weightData;
144 double *hist, *hist1;
145 double *CDF, *CDF1;
146 double sum;
147 double *indep;
148 double lowerLimit, upperLimit;
149 double givenLowerLimit, givenUpperLimit;
150 double range, binSize;
151 int64_t bins;
152 long doStats;
153 double mean, rms, standDev, mad;
154 char *filterColumn, *dataColumn, *weightColumn;
155 double lowerFilter = 0, upperFilter = 0;
156 int64_t points;
157 SCANNED_ARG *scanned;
158 char *inputfile, *outputfile;
159 double dx;
160 int64_t i;
161 long pointsBinned;
162 long normalizeMode, doSides, verbose, readCode;
163 int64_t rows;
164 unsigned long pipeFlags, majorOrderFlag, regionFlags = 0;
165 char *cdf;
166 double expansionFactor = 0;
167 short columnMajorOrder = -1;
168 char *regionFilename = NULL, *regionPositionColumn = NULL, *regionNameColumn = NULL;
169 double *regionPosition = NULL;
170 int64_t nRegions = 0;
171 char **regionName = NULL;
173 int threads = 1;
174
176 argc =
scanargs(&scanned, argc, argv);
177 if (argc < 3) {
178 fprintf(stderr, "%s\n", USAGE);
179 fputs(additional_help, stderr);
180 exit(EXIT_FAILURE);
181 }
182
183 binsGiven = lowerLimitGiven = upperLimitGiven = 0;
184 binSize = doSides = 0;
185 inputfile = outputfile = NULL;
186 dataColumn = filterColumn = weightColumn = NULL;
187 doStats = verbose = 0;
188 normalizeMode = NORMALIZE_NO;
189 pipeFlags = 0;
190 dx = 0;
191 cdfOnly = 0;
192 freOnly = 1;
193
194 for (i = 1; i < argc; i++) {
195 if (scanned[i].arg_type == OPTION) {
196 switch (
match_string(scanned[i].list[0], option, N_OPTIONS, 0)) {
197 case SET_MAJOR_ORDER:
198 majorOrderFlag = 0;
199 scanned[i].n_items--;
200 if (scanned[i].n_items > 0 && (!
scanItemList(&majorOrderFlag, scanned[i].list + 1, &scanned[i].n_items, 0,
"row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
"column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
201 SDDS_Bomb(
"invalid -majorOrder syntax/values");
202 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
203 columnMajorOrder = 1;
204 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
205 columnMajorOrder = 0;
206 break;
207 case SET_BINS:
208 if (binsGiven)
209 SDDS_Bomb(
"-bins specified more than once");
210 binsGiven = 1;
211 if (sscanf(scanned[i].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
213 break;
214 case SET_LOWERLIMIT:
215 if (lowerLimitGiven)
216 SDDS_Bomb(
"-lowerLimit specified more than once");
217 lowerLimitGiven = 1;
218 if (sscanf(scanned[i].list[1], "%lf", &givenLowerLimit) != 1)
219 SDDS_Bomb(
"invalid value for lowerLimit");
220 break;
221 case SET_UPPERLIMIT:
222 if (upperLimitGiven)
223 SDDS_Bomb(
"-upperLimit specified more than once");
224 upperLimitGiven = 1;
225 if (sscanf(scanned[i].list[1], "%lf", &givenUpperLimit) != 1)
226 SDDS_Bomb(
"invalid value for upperLimit");
227 break;
228 case SET_EXPAND:
229 expansionFactor = 0;
230 if (sscanf(scanned[i].list[1], "%lf", &expansionFactor) != 1 || expansionFactor <= 0)
232 break;
233 case SET_DATACOLUMN:
234 if (dataColumn)
235 SDDS_Bomb(
"-dataColumn specified more than once");
236 if (scanned[i].n_items != 2)
237 SDDS_Bomb(
"invalid -dataColumn syntax---supply name");
238 dataColumn = scanned[i].list[1];
239 break;
240 case SET_FILTER:
241 if (filterColumn)
242 SDDS_Bomb(
"multiple filter specifications not allowed");
243 if (scanned[i].n_items != 4 || sscanf(scanned[i].list[2], "%lf", &lowerFilter) != 1 ||
244 sscanf(scanned[i].list[3], "%lf", &upperFilter) != 1 || lowerFilter > upperFilter)
245 SDDS_Bomb(
"invalid -filter syntax/values");
246 filterColumn = scanned[i].list[1];
247 break;
248 case SET_WEIGHTCOLUMN:
249 if (weightColumn)
250 SDDS_Bomb(
"multiple weighting columns not allowed");
251 if (scanned[i].n_items != 2)
252 SDDS_Bomb(
"-weightColumn requires a column name");
253 weightColumn = scanned[i].list[1];
254 break;
255 case SET_NORMALIZE:
256 if (scanned[i].n_items == 1)
257 normalizeMode = NORMALIZE_SUM;
258 else if (scanned[i].n_items != 2 || (normalizeMode =
match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
260 break;
261 case SET_STATISTICS:
262 doStats = 1;
263 break;
264 case SET_SIDES:
265 if (scanned[i].n_items == 1)
266 doSides = 1;
267 else if (scanned[i].n_items > 2 || (sscanf(scanned[i].list[1], "%ld", &doSides) != 1 || doSides <= 0))
269 break;
270 case SET_VERBOSE:
271 verbose = 1;
272 break;
273 case SET_BINSIZE:
274 if (sscanf(scanned[i].list[1], "%le", &binSize) != 1 || binSize <= 0)
276 break;
277 case SET_PIPE:
280 break;
281 case SET_CDF:
282 if (scanned[i].n_items == 1)
283 cdfOnly = 0;
284 else {
285 if (scanned[i].n_items != 2)
287 cdf = scanned[i].list[1];
288 if (strcmp(cdf, "only") != 0)
289 SDDS_Bomb(
"invalid -cdf value, it should be -cdf or -cdf=only");
290 cdfOnly = 1;
291 }
292 freOnly = 0;
293 break;
294 case SET_REGION_FILE:
295 if (scanned[i].n_items != 4)
297 regionFlags = 0;
298 scanned[i].n_items -= 1;
299 if (!
scanItemList(®ionFlags, scanned[i].list + 1, &scanned[i].n_items, 0,
301 "position",
SDDS_STRING, ®ionPositionColumn, 1, 2,
302 "name",
SDDS_STRING, ®ionNameColumn, 1, 4, NULL) ||
303 regionFlags != (1 + 2 + 4) || !regionFilename || !regionPositionColumn || !regionNameColumn)
305 break;
306 case SET_THREADS:
307 if (scanned[i].n_items != 2 ||
308 !sscanf(scanned[i].list[1], "%d", &threads) || threads < 1)
310 break;
311 default:
312 fprintf(stderr, "Error: option %s not recognized\n", scanned[i].list[0]);
313 exit(EXIT_FAILURE);
314 break;
315 }
316 } else {
317
318 if (!inputfile)
319 inputfile = scanned[i].list[0];
320 else if (!outputfile)
321 outputfile = scanned[i].list[0];
322 else
324 }
325 }
326
328
329 if (binSize && binsGiven && regionFlags)
330 SDDS_Bomb(
"Provide only one of -bins, -sizeOfBins, or -regions");
331 if (!binsGiven)
332 bins = 20;
333 if (!dataColumn)
334 SDDS_Bomb(
"-dataColumn must be specified");
335
336 if (regionFlags) {
337 if (!(nRegions = readRegionFile(&SDDSregion, regionFilename, regionPositionColumn, regionNameColumn, ®ionPosition, ®ionName)))
338 SDDS_Bomb(
"Problem with region file. Check existence and type of columns");
339 doSides = 0;
340 bins = nRegions + 1;
341 }
342
343 hist =
tmalloc(
sizeof(*hist) * (bins + 2 * doSides));
344 CDF = CDF1 =
tmalloc(
sizeof(*hist) * (bins + 2 * doSides));
345 indep =
tmalloc(
sizeof(*indep) * (bins + 2 * doSides));
346 pointsBinned = 0;
347
353 if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, &SDDSregion, regionNameColumn, doStats, bins, binSize, normalizeMode, columnMajorOrder))
355
356 data = weightData = filterData = NULL;
364
365 if (rows && filterColumn)
366 points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter);
367 else
368 points = rows;
369
370 pointsBinned = 0;
371 if (points) {
372 if (doStats) {
373 if (!weightColumn)
375 else
377 }
378
379 if (regionFlags) {
380 classifyByRegion(data, weightData, points, hist, regionPosition, bins);
381 hist1 = hist;
382 } else {
383 if (!lowerLimitGiven) {
384 lowerLimit = (points > 0) ? data[0] : 0;
385 for (i = 0; i < points; i++)
386 if (lowerLimit > data[i])
387 lowerLimit = data[i];
388 } else {
389 lowerLimit = givenLowerLimit;
390 }
391 if (!upperLimitGiven) {
392 upperLimit = (points > 0) ? data[0] : 0;
393 for (i = 0; i < points; i++)
394 if (upperLimit < data[i])
395 upperLimit = data[i];
396 } else {
397 upperLimit = givenUpperLimit;
398 }
399
400 range = upperLimit - lowerLimit;
401 if (!lowerLimitGiven)
402 lowerLimit -= range * 1e-7;
403 if (!upperLimitGiven)
404 upperLimit += range * 1e-7;
405 if (upperLimit == lowerLimit) {
406 if (binSize) {
407 upperLimit += binSize / 2;
408 lowerLimit -= binSize / 2;
409 } else if (fabs(upperLimit) < sqrt(DBL_MIN)) {
410 upperLimit = sqrt(DBL_MIN);
411 lowerLimit = -sqrt(DBL_MIN);
412 } else {
413 upperLimit += upperLimit * (1 + 2 * DBL_EPSILON);
414 lowerLimit -= upperLimit * (1 - 2 * DBL_EPSILON);
415 }
416 }
417 if (expansionFactor > 0) {
418 double center = (upperLimit + lowerLimit) / 2;
419 range = expansionFactor * (upperLimit - lowerLimit);
420 lowerLimit = center - range / 2;
421 upperLimit = center + range / 2;
422 }
423 dx = (upperLimit - lowerLimit) / bins;
424
425 if (binSize) {
426 double middle;
427 range = ((range / binSize) + 1) * binSize;
428 middle = (lowerLimit + upperLimit) / 2;
429 lowerLimit = middle - range / 2;
430 upperLimit = middle + range / 2;
431 dx = binSize;
432 bins = range / binSize + 0.5;
433 if (bins < 1 && !doSides)
434 bins = 2 * doSides;
435 indep =
trealloc(indep,
sizeof(*indep) * (bins + 2 * doSides));
436 hist =
trealloc(hist,
sizeof(*hist) * (bins + 2 * doSides));
437 CDF =
trealloc(CDF,
sizeof(*hist) * (bins + 2 * doSides));
438 }
439
440 for (i = -doSides; i < bins + doSides; i++)
441 indep[i + doSides] = (i + 0.5) * dx + lowerLimit;
442 hist1 = hist + doSides;
443 CDF1 = CDF + doSides;
444 if (doSides) {
445 hist[0] = hist[bins + doSides] = 0;
446 }
447
448 if (!weightColumn)
449 pointsBinned =
make_histogram(hist1, bins, lowerLimit, upperLimit, data, points, 1);
450 else
452 }
453
454 sum = 0;
455 for (i = 0; i < bins + doSides; i++) {
456 sum += hist1[i];
457 }
458 CDF1[0] = hist1[0] / sum;
459 for (i = 1; i < bins + doSides; i++) {
460 CDF1[i] = CDF1[i - 1] + hist1[i] / sum;
461 }
462
463 if (verbose)
464 fprintf(stderr, "%ld points of %" PRId64 " from page %ld histogrammed in %" PRId64 " bins\n", pointsBinned, rows, readCode, bins);
465 if (!cdfOnly) {
466 if (normalizeMode != NORMALIZE_NO) {
467 double norm = 0;
468 switch (normalizeMode) {
469 case NORMALIZE_PEAK:
471 break;
472 case NORMALIZE_AREA:
473 case NORMALIZE_SUM:
474 for (i = 0; i < bins; i++)
475 norm += hist1[i];
476 if (normalizeMode == NORMALIZE_AREA)
477 norm *= dx;
478 break;
479 default:
480 SDDS_Bomb(
"invalid normalize mode--consult programmer.");
481 break;
482 }
483 if (norm)
484 for (i = 0; i < bins; i++)
485 hist1[i] /= norm;
486 }
487 }
488 }
489
490 if (regionFlags) {
493 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
495 if (points) {
496 if (!
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, regionPosition, bins, iIndep) ||
497 !
SDDS_SetColumn(&outTable, SDDS_SET_BY_NAME, regionName, bins, regionNameColumn))
499 if (!freOnly && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins, iCdf))
501 if (!cdfOnly && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins, iFreq))
503 }
504 } else {
507 (points && (!
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, indep, bins + 2 * doSides, iIndep))) ||
508 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
510 if (!freOnly) {
511 if (points && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins + 2 * doSides, iCdf))
513 }
514 if (!cdfOnly) {
515 if (points && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins + 2 * doSides, iFreq))
517 }
518 }
519
520 if (filterColumn && points &&
521 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iLoFilter, lowerFilter, iUpFilter, upperFilter, -1))
523 if (doStats && points &&
524 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iMean, mean, iRMS, rms, iStDev, standDev, -1))
526
529 if (data)
530 free(data);
531 if (weightData)
532 free(weightData);
533 if (filterData)
534 free(filterData);
535 data = weightData = filterData = NULL;
536 }
537
540 exit(EXIT_FAILURE);
541 }
544 exit(EXIT_FAILURE);
545 }
546 return EXIT_SUCCESS;
547}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_STRING
Identifier for the string data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeWeightedMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, double *w, long n, long numThreads)
Computes weighted statistical moments of an array using multiple threads.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.