171 {
172
173 long binsGiven, lowerLimitGiven, upperLimitGiven;
175 double *data;
176 double *filterData;
177 double *weightData;
178 double *hist, *hist1;
179 double *CDF, *CDF1;
180 double sum;
181 double *indep;
182 double lowerLimit, upperLimit;
183 double givenLowerLimit, givenUpperLimit;
184 double range, binSize;
185 int64_t bins;
186 long doStats;
187 double mean, rms, standDev, mad;
188 char *filterColumn, *dataColumn, *weightColumn;
189 double lowerFilter = 0, upperFilter = 0;
190 int64_t points;
191 SCANNED_ARG *scanned;
192 char *inputfile, *outputfile;
193 double dx;
194 int64_t i;
195 long pointsBinned;
196 long normalizeMode, doSides, verbose, readCode;
197 int64_t rows;
198 unsigned long pipeFlags, majorOrderFlag, regionFlags = 0;
199 char *cdf;
200 double expansionFactor = 0;
201 short columnMajorOrder = -1;
202 char *regionFilename = NULL, *regionPositionColumn = NULL, *regionNameColumn = NULL;
203 double *regionPosition = NULL;
204 int64_t nRegions = 0;
205 char **regionName = NULL;
207 int threads = 1;
208
210 argc =
scanargs(&scanned, argc, argv);
211 if (argc < 3) {
212 fprintf(stderr, "%s\n", USAGE);
213 exit(EXIT_FAILURE);
214 }
215
216 binsGiven = lowerLimitGiven = upperLimitGiven = 0;
217 binSize = doSides = 0;
218 inputfile = outputfile = NULL;
219 dataColumn = filterColumn = weightColumn = NULL;
220 doStats = verbose = 0;
221 normalizeMode = NORMALIZE_NO;
222 pipeFlags = 0;
223 dx = 0;
224 cdfOnly = 0;
225 freOnly = 1;
226
227 for (i = 1; i < argc; i++) {
228 if (scanned[i].arg_type == OPTION) {
229 switch (
match_string(scanned[i].list[0], option, N_OPTIONS, 0)) {
230 case SET_MAJOR_ORDER:
231 majorOrderFlag = 0;
232 scanned[i].n_items--;
233 if (scanned[i].n_items > 0 && (!
scanItemList(&majorOrderFlag, scanned[i].list + 1, &scanned[i].n_items, 0,
"row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
"column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
234 SDDS_Bomb(
"invalid -majorOrder syntax/values");
235 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
236 columnMajorOrder = 1;
237 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
238 columnMajorOrder = 0;
239 break;
240 case SET_BINS:
241 if (binsGiven)
242 SDDS_Bomb(
"-bins specified more than once");
243 binsGiven = 1;
244 if (sscanf(scanned[i].list[1], "%" SCNd64, &bins) != 1 || bins <= 0)
246 break;
247 case SET_LOWERLIMIT:
248 if (lowerLimitGiven)
249 SDDS_Bomb(
"-lowerLimit specified more than once");
250 lowerLimitGiven = 1;
251 if (sscanf(scanned[i].list[1], "%lf", &givenLowerLimit) != 1)
252 SDDS_Bomb(
"invalid value for lowerLimit");
253 break;
254 case SET_UPPERLIMIT:
255 if (upperLimitGiven)
256 SDDS_Bomb(
"-upperLimit specified more than once");
257 upperLimitGiven = 1;
258 if (sscanf(scanned[i].list[1], "%lf", &givenUpperLimit) != 1)
259 SDDS_Bomb(
"invalid value for upperLimit");
260 break;
261 case SET_EXPAND:
262 expansionFactor = 0;
263 if (sscanf(scanned[i].list[1], "%lf", &expansionFactor) != 1 || expansionFactor <= 0)
265 break;
266 case SET_DATACOLUMN:
267 if (dataColumn)
268 SDDS_Bomb(
"-dataColumn specified more than once");
269 if (scanned[i].n_items != 2)
270 SDDS_Bomb(
"invalid -dataColumn syntax---supply name");
271 dataColumn = scanned[i].list[1];
272 break;
273 case SET_FILTER:
274 if (filterColumn)
275 SDDS_Bomb(
"multiple filter specifications not allowed");
276 if (scanned[i].n_items != 4 || sscanf(scanned[i].list[2], "%lf", &lowerFilter) != 1 ||
277 sscanf(scanned[i].list[3], "%lf", &upperFilter) != 1 || lowerFilter > upperFilter)
278 SDDS_Bomb(
"invalid -filter syntax/values");
279 filterColumn = scanned[i].list[1];
280 break;
281 case SET_WEIGHTCOLUMN:
282 if (weightColumn)
283 SDDS_Bomb(
"multiple weighting columns not allowed");
284 if (scanned[i].n_items != 2)
285 SDDS_Bomb(
"-weightColumn requires a column name");
286 weightColumn = scanned[i].list[1];
287 break;
288 case SET_NORMALIZE:
289 if (scanned[i].n_items == 1)
290 normalizeMode = NORMALIZE_SUM;
291 else if (scanned[i].n_items != 2 || (normalizeMode =
match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0)) < 0)
293 break;
294 case SET_STATISTICS:
295 doStats = 1;
296 break;
297 case SET_SIDES:
298 if (scanned[i].n_items == 1)
299 doSides = 1;
300 else if (scanned[i].n_items > 2 || (sscanf(scanned[i].list[1], "%ld", &doSides) != 1 || doSides <= 0))
302 break;
303 case SET_VERBOSE:
304 verbose = 1;
305 break;
306 case SET_BINSIZE:
307 if (sscanf(scanned[i].list[1], "%le", &binSize) != 1 || binSize <= 0)
309 break;
310 case SET_PIPE:
313 break;
314 case SET_CDF:
315 if (scanned[i].n_items == 1)
316 cdfOnly = 0;
317 else {
318 if (scanned[i].n_items != 2)
320 cdf = scanned[i].list[1];
321 if (strcmp(cdf, "only") != 0)
322 SDDS_Bomb(
"invalid -cdf value, it should be -cdf or -cdf=only");
323 cdfOnly = 1;
324 }
325 freOnly = 0;
326 break;
327 case SET_REGION_FILE:
328 if (scanned[i].n_items != 4)
330 regionFlags = 0;
331 scanned[i].n_items -= 1;
332 if (!
scanItemList(®ionFlags, scanned[i].list + 1, &scanned[i].n_items, 0,
334 "position",
SDDS_STRING, ®ionPositionColumn, 1, 2,
335 "name",
SDDS_STRING, ®ionNameColumn, 1, 4, NULL) ||
336 regionFlags != (1 + 2 + 4) || !regionFilename || !regionPositionColumn || !regionNameColumn)
338 break;
339 case SET_THREADS:
340 if (scanned[i].n_items != 2 ||
341 !sscanf(scanned[i].list[1], "%d", &threads) || threads < 1)
343 break;
344 default:
345 fprintf(stderr, "Error: option %s not recognized\n", scanned[i].list[0]);
346 exit(EXIT_FAILURE);
347 break;
348 }
349 } else {
350
351 if (!inputfile)
352 inputfile = scanned[i].list[0];
353 else if (!outputfile)
354 outputfile = scanned[i].list[0];
355 else
357 }
358 }
359
361
362 if (binSize && binsGiven && regionFlags)
363 SDDS_Bomb(
"Provide only one of -bins, -sizeOfBins, or -regions");
364 if (!binsGiven)
365 bins = 20;
366 if (!dataColumn)
367 SDDS_Bomb(
"-dataColumn must be specified");
368
369 if (regionFlags) {
370 if (!(nRegions = readRegionFile(&SDDSregion, regionFilename, regionPositionColumn, regionNameColumn, ®ionPosition, ®ionName)))
371 SDDS_Bomb(
"Problem with region file. Check existence and type of columns");
372 doSides = 0;
373 bins = nRegions + 1;
374 }
375
376 hist =
tmalloc(
sizeof(*hist) * (bins + 2 * doSides));
377 CDF = CDF1 =
tmalloc(
sizeof(*hist) * (bins + 2 * doSides));
378 indep =
tmalloc(
sizeof(*indep) * (bins + 2 * doSides));
379 pointsBinned = 0;
380
386 if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, &SDDSregion, regionNameColumn, doStats, bins, binSize, normalizeMode, columnMajorOrder))
388
389 data = weightData = filterData = NULL;
397
398 if (rows && filterColumn)
399 points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter);
400 else
401 points = rows;
402
403 pointsBinned = 0;
404 if (points) {
405 if (doStats) {
406 if (!weightColumn)
408 else
410 }
411
412 if (regionFlags) {
413 classifyByRegion(data, weightData, points, hist, regionPosition, bins);
414 hist1 = hist;
415 } else {
416 if (!lowerLimitGiven) {
417 lowerLimit = (points > 0) ? data[0] : 0;
418 for (i = 0; i < points; i++)
419 if (lowerLimit > data[i])
420 lowerLimit = data[i];
421 } else {
422 lowerLimit = givenLowerLimit;
423 }
424 if (!upperLimitGiven) {
425 upperLimit = (points > 0) ? data[0] : 0;
426 for (i = 0; i < points; i++)
427 if (upperLimit < data[i])
428 upperLimit = data[i];
429 } else {
430 upperLimit = givenUpperLimit;
431 }
432
433 range = upperLimit - lowerLimit;
434 if (!lowerLimitGiven)
435 lowerLimit -= range * 1e-7;
436 if (!upperLimitGiven)
437 upperLimit += range * 1e-7;
438 if (upperLimit == lowerLimit) {
439 if (binSize) {
440 upperLimit += binSize / 2;
441 lowerLimit -= binSize / 2;
442 } else if (fabs(upperLimit) < sqrt(DBL_MIN)) {
443 upperLimit = sqrt(DBL_MIN);
444 lowerLimit = -sqrt(DBL_MIN);
445 } else {
446 upperLimit += upperLimit * (1 + 2 * DBL_EPSILON);
447 lowerLimit -= upperLimit * (1 - 2 * DBL_EPSILON);
448 }
449 }
450 if (expansionFactor > 0) {
451 double center = (upperLimit + lowerLimit) / 2;
452 range = expansionFactor * (upperLimit - lowerLimit);
453 lowerLimit = center - range / 2;
454 upperLimit = center + range / 2;
455 }
456 dx = (upperLimit - lowerLimit) / bins;
457
458 if (binSize) {
459 double middle;
460 range = ((range / binSize) + 1) * binSize;
461 middle = (lowerLimit + upperLimit) / 2;
462 lowerLimit = middle - range / 2;
463 upperLimit = middle + range / 2;
464 dx = binSize;
465 bins = range / binSize + 0.5;
466 if (bins < 1 && !doSides)
467 bins = 2 * doSides;
468 indep =
trealloc(indep,
sizeof(*indep) * (bins + 2 * doSides));
469 hist =
trealloc(hist,
sizeof(*hist) * (bins + 2 * doSides));
470 CDF =
trealloc(CDF,
sizeof(*hist) * (bins + 2 * doSides));
471 }
472
473 for (i = -doSides; i < bins + doSides; i++)
474 indep[i + doSides] = (i + 0.5) * dx + lowerLimit;
475 hist1 = hist + doSides;
476 CDF1 = CDF + doSides;
477 if (doSides) {
478 hist[0] = hist[bins + doSides] = 0;
479 }
480
481 if (!weightColumn)
482 pointsBinned =
make_histogram(hist1, bins, lowerLimit, upperLimit, data, points, 1);
483 else
485 }
486
487 sum = 0;
488 for (i = 0; i < bins + doSides; i++) {
489 sum += hist1[i];
490 }
491 CDF1[0] = hist1[0] / sum;
492 for (i = 1; i < bins + doSides; i++) {
493 CDF1[i] = CDF1[i - 1] + hist1[i] / sum;
494 }
495
496 if (verbose)
497 fprintf(stderr, "%ld points of %" PRId64 " from page %ld histogrammed in %" PRId64 " bins\n", pointsBinned, rows, readCode, bins);
498 if (!cdfOnly) {
499 if (normalizeMode != NORMALIZE_NO) {
500 double norm = 0;
501 switch (normalizeMode) {
502 case NORMALIZE_PEAK:
504 break;
505 case NORMALIZE_AREA:
506 case NORMALIZE_SUM:
507 for (i = 0; i < bins; i++)
508 norm += hist1[i];
509 if (normalizeMode == NORMALIZE_AREA)
510 norm *= dx;
511 break;
512 default:
513 SDDS_Bomb(
"invalid normalize mode--consult programmer.");
514 break;
515 }
516 if (norm)
517 for (i = 0; i < bins; i++)
518 hist1[i] /= norm;
519 }
520 }
521 }
522
523 if (regionFlags) {
526 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
528 if (points) {
529 if (!
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, regionPosition, bins, iIndep) ||
530 !
SDDS_SetColumn(&outTable, SDDS_SET_BY_NAME, regionName, bins, regionNameColumn))
532 if (!freOnly && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins, iCdf))
534 if (!cdfOnly && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins, iFreq))
536 }
537 } else {
540 (points && (!
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, indep, bins + 2 * doSides, iIndep))) ||
541 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iBins, bins, iBinSize, dx, iPoints, pointsBinned, -1))
543 if (!freOnly) {
544 if (points && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, CDF, bins + 2 * doSides, iCdf))
546 }
547 if (!cdfOnly) {
548 if (points && !
SDDS_SetColumn(&outTable, SDDS_SET_BY_INDEX, hist, bins + 2 * doSides, iFreq))
550 }
551 }
552
553 if (filterColumn && points &&
554 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iLoFilter, lowerFilter, iUpFilter, upperFilter, -1))
556 if (doStats && points &&
557 !
SDDS_SetParameters(&outTable, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iMean, mean, iRMS, rms, iStDev, standDev, -1))
559
562 if (data)
563 free(data);
564 if (weightData)
565 free(weightData);
566 if (filterData)
567 free(filterData);
568 data = weightData = filterData = NULL;
569 }
570
573 exit(EXIT_FAILURE);
574 }
577 exit(EXIT_FAILURE);
578 }
579 return EXIT_SUCCESS;
580}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_STRING
Identifier for the string data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
double max_in_array(double *array, long n)
Finds the maximum value in an array of doubles.
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeWeightedMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, double *w, long n, long numThreads)
Computes weighted statistical moments of an array using multiple threads.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.