191 {
193 long stats;
195 long requests;
196 SCANNED_ARG *scanned;
198
199 int32_t power;
200 long i_arg, code, iStat, tmpFileUsed, iColumn, posColIndex;
201 int64_t rows, row, count;
202 long noWarnings, maxSourceColumns;
203 char *input, *output, *positionColumn, **posColumnName;
204 double **inputData, *outputData, value1, value2, topLimit, bottomLimit;
205 unsigned long pipeFlags, scanFlags, majorOrderFlag;
206 char s[100];
207 double *statWorkArray;
208 double quartilePoint[2] = {25.0, 75.0}, quartileResult[2];
209 double decilePoint[2] = {10.0, 90.0}, decileResult[2];
210 double percent;
211 short columnMajorOrder = -1;
212 int threads = 1;
213
215 argc =
scanargs(&scanned, argc, argv);
216 if (argc < 2) {
217 bomb(
"too few arguments", USAGE);
218 }
219
220 posColumnName = NULL;
221 input = output = positionColumn = NULL;
222 stat = NULL;
223 request = NULL;
224 stats = requests = pipeFlags = 0;
225 noWarnings = 0;
226 outputData = NULL;
227 statWorkArray = NULL;
228
229 for (i_arg = 1; i_arg < argc; i_arg++) {
230 scanFlags = 0;
231 if (scanned[i_arg].arg_type == OPTION) {
232
233 switch (code =
match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
234 case SET_MAXIMUM:
235 case SET_MINIMUM:
236 case SET_MEAN:
237 case SET_MEDIAN:
238 case SET_STANDARDDEVIATION:
239 case SET_RMS:
240 case SET_SIGMA:
241 case SET_MAD:
242 case SET_COUNT:
243 case SET_DRANGE:
244 case SET_QRANGE:
245 case SET_SMALLEST:
246 case SET_LARGEST:
247 case SET_SPREAD_ARG:
248 if (scanned[i_arg].n_items < 3) {
249 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
250 exit(EXIT_FAILURE);
251 }
252 if (!
scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
253 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
254 SCANITEMLIST_IGNORE_VALUELESS,
255 "positionColumn",
SDDS_STRING, &positionColumn, 1, POSITIONCOLUMN_GIVEN,
256 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
257 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
258 sprintf(s, "invalid -%s syntax", scanned[i_arg].list[0]);
260 }
261 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
262 request[requests - 1].topLimit = topLimit;
263 request[requests - 1].bottomLimit = bottomLimit;
264 if (positionColumn) {
265 if (code == SET_MAXIMUM || code == SET_MINIMUM || code == SET_LARGEST || code == SET_SMALLEST)
266 SDDS_CopyString(&request[requests - 1].positionColumn, positionColumn);
267 free(positionColumn);
268 positionColumn = NULL;
269 }
270 break;
271 case SET_PERCENTILE:
272 if (scanned[i_arg].n_items < 3) {
273 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
274 exit(EXIT_FAILURE);
275 }
276 if (!
scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
277 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
278 SCANITEMLIST_IGNORE_VALUELESS,
280 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
281 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL) ||
282 !(scanFlags & PERCENT_GIVEN) || percent <= 0 || percent >= 100)
284 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
285 request[requests - 1].percent = percent;
286 request[requests - 1].topLimit = topLimit;
287 request[requests - 1].bottomLimit = bottomLimit;
288 break;
289 case SET_SUM:
290 if (scanned[i_arg].n_items < 3) {
291 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
292 exit(EXIT_FAILURE);
293 }
294 power = 1;
295 if (!
scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
296 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
298 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
299 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
301 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
302 request[requests - 1].sumPower = power;
303 request[requests - 1].topLimit = topLimit;
304 request[requests - 1].bottomLimit = bottomLimit;
305 break;
306 case SET_PIPE:
307 if (!
processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipeFlags))
309 break;
310 case SET_NOWARNINGS:
311 noWarnings = 1;
312 break;
313 case SET_MAJOR_ORDER:
314 majorOrderFlag = 0;
315 scanned[i_arg].n_items--;
316 if (scanned[i_arg].n_items > 0 &&
317 (!
scanItemList(&majorOrderFlag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
318 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
319 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
320 SDDS_Bomb(
"invalid -majorOrder syntax/values");
321 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
322 columnMajorOrder = 1;
323 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
324 columnMajorOrder = 0;
325 break;
326 case SET_THREADS:
327 if (scanned[i_arg].n_items != 2 ||
328 !sscanf(scanned[i_arg].list[1], "%d", &threads) || threads < 1)
330 break;
331 default:
332 fprintf(stderr, "error: unknown option '%s' given\n", scanned[i_arg].list[0]);
333 exit(EXIT_FAILURE);
334 break;
335 }
336 } else {
337
338 if (!input)
339 input = scanned[i_arg].list[0];
340 else if (!output)
341 output = scanned[i_arg].list[0];
342 else
344 }
345 }
346
347 processFilenames(
"sddsrowstats", &input, &output, pipeFlags, noWarnings, &tmpFileUsed);
348
349 if (!requests)
351
354
355 if (!(stat = compileStatDefinitions(&inData, request, requests, &stats, noWarnings))) {
357 exit(EXIT_FAILURE);
358 }
359 if (stats < 0)
360 SDDS_Bomb(
"No valid statistics requests.");
361 for (iStat = maxSourceColumns = 0; iStat < stats; iStat++) {
362 if (stat[iStat].sourceColumns > maxSourceColumns)
363 maxSourceColumns = stat[iStat].sourceColumns;
364 }
365 if (!(statWorkArray = malloc(sizeof(*statWorkArray) * maxSourceColumns)))
366 SDDS_Bomb(
"allocation failure (statWorkArray)");
367
368 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder)) {
370 exit(EXIT_FAILURE);
371 }
372
373 inputData = NULL;
374
378 exit(EXIT_FAILURE);
379 }
381 if (!(outputData = (double *)malloc(sizeof(*outputData) * rows))) {
383 }
384 if (!(posColumnName = (char **)malloc(sizeof(*posColumnName) * rows))) {
386 }
387 for (iStat = 0; iStat < stats; iStat++) {
388 if (!(inputData = (double **)malloc(sizeof(*inputData) * stat[iStat].sourceColumns))) {
390 }
391 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
394 }
395 }
396 for (row = 0; row < rows; row++)
397 outputData[row] = DBL_MAX;
398 switch (stat[iStat].optionCode) {
399 case SET_MINIMUM:
400 for (row = 0; row < rows; row++) {
401 value1 = DBL_MAX;
402 posColIndex = 0;
403 posColumnName[row] = NULL;
404 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
405 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
406 continue;
407 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
408 continue;
409 if (inputData[iColumn][row] < value1) {
410 value1 = inputData[iColumn][row];
411 posColIndex = iColumn;
412 }
413 }
414 outputData[row] = value1;
415 if (stat[iStat].positionColumn)
416 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
417 }
418 break;
419 case SET_MAXIMUM:
420 for (row = 0; row < rows; row++) {
421 posColIndex = 0;
422 value1 = -DBL_MAX;
423 posColumnName[row] = NULL;
424 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
425 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
426 continue;
427 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
428 continue;
429 if (inputData[iColumn][row] > value1) {
430 posColIndex = iColumn;
431 value1 = inputData[iColumn][row];
432 }
433 }
434 outputData[row] = value1;
435 if (stat[iStat].positionColumn)
436 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
437 }
438 break;
439 case SET_MEAN:
440 for (row = 0; row < rows; row++) {
441 value1 = 0;
442 count = 0;
443 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
444 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
445 continue;
446 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
447 continue;
448 value1 += inputData[iColumn][row];
449 count++;
450 }
451 if (count)
452 outputData[row] = value1 / count;
453 }
454 break;
455 case SET_MEDIAN:
456 for (row = 0; row < rows; row++) {
457 for (iColumn = count = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
458 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
459 continue;
460 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
461 continue;
462 statWorkArray[count] = inputData[iColumn][row];
463 count++;
464 }
465 if (count)
467 }
468 break;
469 case SET_STANDARDDEVIATION:
470 for (row = 0; row < rows; row++) {
471 value1 = 0;
472 value2 = 0;
473 count = 0;
474 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
475 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
476 continue;
477 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
478 continue;
479 value1 += inputData[iColumn][row];
480 value2 += inputData[iColumn][row] * inputData[iColumn][row];
481 count++;
482 }
483 if (count > 1) {
484 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
485 outputData[row] = 0;
486 else
487 outputData[row] = sqrt(value1 * count / (count - 1.0));
488 }
489 }
490 break;
491 case SET_SIGMA:
492 for (row = 0; row < rows; row++) {
493 value1 = 0;
494 value2 = 0;
495 count = 0;
496 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
497 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
498 continue;
499 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
500 continue;
501 value1 += inputData[iColumn][row];
502 value2 += inputData[iColumn][row] * inputData[iColumn][row];
503 count++;
504 }
505 if (count > 1) {
506 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
507 outputData[row] = 0;
508 else
509 outputData[row] = sqrt(value1 / (count - 1.0));
510 }
511 }
512 break;
513 case SET_RMS:
514 for (row = 0; row < rows; row++) {
515 value1 = 0;
516 count = 0;
517 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
518 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
519 continue;
520 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
521 continue;
522 value1 += sqr(inputData[iColumn][row]);
523 count++;
524 }
525 if (count)
526 outputData[row] = sqrt(value1 / count);
527 }
528 break;
529 case SET_SUM:
530 for (row = 0; row < rows; row++) {
531 value1 = 0;
532 count = 0;
533 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
534 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
535 continue;
536 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
537 continue;
538 value1 +=
ipow(inputData[iColumn][row], stat[iStat].sumPower);
539 count++;
540 }
541 if (count)
542 outputData[row] = value1;
543 }
544 break;
545 case SET_COUNT:
546 for (row = 0; row < rows; row++) {
547 count = 0;
548 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
549 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
550 continue;
551 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
552 continue;
553 count++;
554 }
555 outputData[row] = count;
556 }
557 break;
558 case SET_MAD:
559 for (row = 0; row < rows; row++) {
560 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
561 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
562 continue;
563 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
564 continue;
565 statWorkArray[count] = inputData[iColumn][row];
566 count++;
567 }
568 if (count)
570 }
571 break;
572 case SET_DRANGE:
573 for (row = 0; row < rows; row++) {
574 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
575 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
576 continue;
577 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
578 continue;
579 statWorkArray[count] = inputData[iColumn][row];
580 count++;
581 }
583 outputData[row] = decileResult[1] - decileResult[0];
584 }
585 break;
586 case SET_QRANGE:
587 for (row = 0; row < rows; row++) {
588 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
589 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
590 continue;
591 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
592 continue;
593 statWorkArray[count] = inputData[iColumn][row];
594 count++;
595 }
597 outputData[row] = quartileResult[1] - quartileResult[0];
598 }
599 break;
600 case SET_SMALLEST:
601 for (row = 0; row < rows; row++) {
602 value1 = DBL_MAX;
603 posColIndex = 0;
604 posColumnName[row] = NULL;
605 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
606 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
607 continue;
608 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
609 continue;
610 if ((value2 = fabs(inputData[iColumn][row])) < value1) {
611 posColIndex = iColumn;
612 value1 = value2;
613 }
614 }
615 outputData[row] = value1;
616 if (stat[iStat].positionColumn)
617 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
618 }
619 break;
620 case SET_LARGEST:
621 for (row = 0; row < rows; row++) {
622 value1 = 0;
623 posColIndex = 0;
624 posColumnName[row] = NULL;
625 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
626 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
627 continue;
628 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
629 continue;
630 if ((value2 = fabs(inputData[iColumn][row])) > value1) {
631 posColIndex = iColumn;
632 value1 = value2;
633 }
634 }
635 outputData[row] = value1;
636 if (stat[iStat].positionColumn)
637 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
638 }
639 break;
640 case SET_SPREAD_ARG:
641 for (row = 0; row < rows; row++) {
642 value1 = DBL_MAX;
643 value2 = -DBL_MAX;
644 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
645 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
646 continue;
647 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
648 continue;
649 if (inputData[iColumn][row] < value1)
650 value1 = inputData[iColumn][row];
651 if (inputData[iColumn][row] > value2)
652 value2 = inputData[iColumn][row];
653 }
654 outputData[row] = value2 - value1;
655 }
656 break;
657 case SET_PERCENTILE:
658 for (row = 0; row < rows; row++) {
659 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
660 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
661 continue;
662 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
663 continue;
664 statWorkArray[count] = inputData[iColumn][row];
665 count++;
666 }
667 outputData[row] = HUGE_VAL;
668 if (count)
670 }
671 break;
672 default:
673 SDDS_Bomb(
"invalid statistic code (accumulation loop)");
674 break;
675 }
676 if (!
SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, outputData, rows, stat[iStat].resultIndex))
678
679 if (stat[iStat].positionColumn) {
680 if (!
SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, posColumnName, rows, stat[iStat].positionColumnIndex))
682 }
683 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
684 free(inputData[iColumn]);
685 free(inputData);
686 inputData = NULL;
687 }
688 free(outputData);
689 outputData = NULL;
690 free(posColumnName);
691 posColumnName = NULL;
692 }
695 }
697 for (iStat = 0; iStat < stats; iStat++) {
698 if (stat[iStat].positionColumn)
699 free(stat[iStat].positionColumn);
700 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
701 free(stat[iStat].sourceColumn[iColumn]);
702 free(stat[iStat].sourceColumn);
703 }
704 free(request);
705 free(stat);
706 if (statWorkArray)
707 free(statWorkArray);
708
711 exit(EXIT_FAILURE);
712 }
714 exit(EXIT_FAILURE);
715 return EXIT_SUCCESS;
716}
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
void free_scanargs(SCANNED_ARG **scanned, int argc)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.