SDDSlib
Loading...
Searching...
No Matches
sddsrowstats.c File Reference

Computes statistics for rows across multiple columns in SDDS datasets. More...

#include "mdb.h"
#include "scan.h"
#include "SDDS.h"
#include "SDDSutils.h"
#include <ctype.h>

Go to the source code of this file.

Classes

struct  STAT_REQUEST
 
struct  STAT_DEFINITION
 

Macros

#define TOPLIMIT_GIVEN   0x0001U
 
#define BOTTOMLIMIT_GIVEN   0x0002U
 
#define POSITIONCOLUMN_GIVEN   0x0004U
 
#define PERCENT_GIVEN   0x0008U
 

Enumerations

enum  option_type {
  SET_MAXIMUM , SET_MINIMUM , SET_MEAN , SET_STANDARDDEVIATION ,
  SET_RMS , SET_SUM , SET_SIGMA , SET_COUNT ,
  SET_PIPE , SET_MEDIAN , SET_MAD , SET_NOWARNINGS ,
  SET_DRANGE , SET_QRANGE , SET_LARGEST , SET_SMALLEST ,
  SET_SPREAD_ARG , SET_PERCENTILE , SET_MAJOR_ORDER , SET_THREADS ,
  N_OPTIONS
}
 

Functions

long addStatRequests (STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flag)
 
STAT_DEFINITIONcompileStatDefinitions (SDDS_DATASET *inTable, STAT_REQUEST *request, long requests, long *stats, long noWarnings)
 
long setupOutputFile (SDDS_DATASET *outTable, char *output, SDDS_DATASET *inTable, STAT_DEFINITION *stat, long stats, short columnMajorOrder)
 
int main (int argc, char **argv)
 

Variables

char * option [N_OPTIONS]
 
static char * USAGE
 

Detailed Description

Computes statistics for rows across multiple columns in SDDS datasets.

The sddsrowstats program processes an SDDS dataset, computes specified statistics for each row across selected columns, and adds new columns with the results to the output dataset. It supports various statistical operations such as mean, median, standard deviation, sum, and more, with options to limit the data considered for each computation.

@usage sddsrowstats [<input>] [<output>] [options]

@options -pipe[=input][,output] -nowarnings -mean=<newName>[,<limitOps>],<columnNameList> -rms=<newName>[,<limitOps>],<columnNameList> -median=<newName>[,<limitOps>],<columnNameList> -minimum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList> -maximum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList> -standardDeviation=<newName>[,<limitOps>],<columnNameList> -sigma=<newName>[,<limitOps>],<columnNameList> -mad=<newName>[,<limitOps>],<columnNameList> -sum=<newName>[,<limitOps>][,power=<integer>],<columnNameList> -spread=<newName>[,<limitOps>],<columnNameList> -drange=<newName>[,<limitOps>],<columnNameList> -qrange=<newName>[,<limitOps>],<columnNameList> -smallest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList> -largest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList> -count=<newName>[,<limitOps>],<columnNameList> -percentile=<newName>[,<limitOps>],value=<percent>,<columnNameList> -majorOrder=row|column -threads=<number>

<limitOps> is of the form [topLimit=

,][bottomLimit=

]

Each statistic option requires specifying a new column name and a list of source columns, which can include wildcards.

License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, H. Shang

Definition in file sddsrowstats.c.

Macro Definition Documentation

◆ BOTTOMLIMIT_GIVEN

#define BOTTOMLIMIT_GIVEN   0x0002U

Definition at line 108 of file sddsrowstats.c.

◆ PERCENT_GIVEN

#define PERCENT_GIVEN   0x0008U

Definition at line 110 of file sddsrowstats.c.

◆ POSITIONCOLUMN_GIVEN

#define POSITIONCOLUMN_GIVEN   0x0004U

Definition at line 109 of file sddsrowstats.c.

◆ TOPLIMIT_GIVEN

#define TOPLIMIT_GIVEN   0x0001U

Definition at line 107 of file sddsrowstats.c.

Enumeration Type Documentation

◆ option_type

enum option_type

Definition at line 60 of file sddsrowstats.c.

60 {
61 SET_MAXIMUM,
62 SET_MINIMUM,
63 SET_MEAN,
64 SET_STANDARDDEVIATION,
65 SET_RMS,
66 SET_SUM,
67 SET_SIGMA,
68 SET_COUNT,
69 SET_PIPE,
70 SET_MEDIAN,
71 SET_MAD,
72 SET_NOWARNINGS,
73 SET_DRANGE,
74 SET_QRANGE,
75 SET_LARGEST,
76 SET_SMALLEST,
77 SET_SPREAD_ARG,
78 SET_PERCENTILE,
79 SET_MAJOR_ORDER,
80 SET_THREADS,
81 N_OPTIONS
82};

Function Documentation

◆ addStatRequests()

long addStatRequests ( STAT_REQUEST ** statRequest,
long requests,
char ** item,
long items,
long code,
unsigned long flag )

Definition at line 718 of file sddsrowstats.c.

718 {
719 long i;
720 if (!(*statRequest = SDDS_Realloc(*statRequest, sizeof(**statRequest) * (requests + 1))) ||
721 !((*statRequest)[requests].sourceColumn = (char **)malloc(sizeof(*(*statRequest)[requests].sourceColumn) * (items - 1))))
722 SDDS_Bomb("memory allocation failure");
723 for (i = 0; i < items - 1; i++)
724 SDDS_CopyString(&((*statRequest)[requests].sourceColumn[i]), item[i + 1]);
725 (*statRequest)[requests].resultColumn = item[0];
726 (*statRequest)[requests].sourceColumns = items - 1;
727 (*statRequest)[requests].optionCode = code;
728 (*statRequest)[requests].sumPower = 1;
729 (*statRequest)[requests].flags = flags;
730 (*statRequest)[requests].positionColumn = NULL;
731
732 return requests + 1;
733}
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677

◆ compileStatDefinitions()

STAT_DEFINITION * compileStatDefinitions ( SDDS_DATASET * inTable,
STAT_REQUEST * request,
long requests,
long * stats,
long noWarnings )

Definition at line 735 of file sddsrowstats.c.

735 {
736 STAT_DEFINITION *stat;
737 long iReq, iName, iStat;
738
739 if (!(stat = (STAT_DEFINITION *)malloc(sizeof(*stat) * requests)))
740 SDDS_Bomb("memory allocation failure");
741 for (iReq = iStat = 0; iReq < requests; iReq++) {
742 if ((stat[iStat].sourceColumns = expandColumnPairNames(inData, &request[iReq].sourceColumn, NULL, request[iReq].sourceColumns, NULL, 0, FIND_NUMERIC_TYPE, 0)) <= 0) {
743 if (!noWarnings) {
744 fprintf(stderr, "Warning: no match for column names (sddsrowstats):\n");
745 for (iName = 0; iName < request[iReq].sourceColumns; iName++)
746 fprintf(stderr, "%s, ", request[iReq].sourceColumn[iName]);
747 fputc('\n', stderr);
748 }
749 } else {
750 stat[iStat].sourceColumn = request[iReq].sourceColumn;
751 stat[iStat].resultColumn = request[iReq].resultColumn;
752 stat[iStat].optionCode = request[iReq].optionCode;
753 stat[iStat].sumPower = request[iReq].sumPower;
754 stat[iStat].flags = request[iReq].flags;
755 stat[iStat].topLimit = request[iReq].topLimit;
756 stat[iStat].bottomLimit = request[iReq].bottomLimit;
757 stat[iStat].positionColumn = request[iReq].positionColumn;
758 stat[iStat].percent = request[iReq].percent;
759 iStat++;
760 }
761 *stats = iStat;
762 }
763
764 return stat;
765}

◆ main()

int main ( int argc,
char ** argv )

Definition at line 191 of file sddsrowstats.c.

191 {
192 STAT_DEFINITION *stat;
193 long stats;
194 STAT_REQUEST *request;
195 long requests;
196 SCANNED_ARG *scanned;
197 SDDS_DATASET inData, outData;
198
199 int32_t power;
200 long i_arg, code, iStat, tmpFileUsed, iColumn, posColIndex;
201 int64_t rows, row, count;
202 long noWarnings, maxSourceColumns;
203 char *input, *output, *positionColumn, **posColumnName;
204 double **inputData, *outputData, value1, value2, topLimit, bottomLimit;
205 unsigned long pipeFlags, scanFlags, majorOrderFlag;
206 char s[100];
207 double *statWorkArray;
208 double quartilePoint[2] = {25.0, 75.0}, quartileResult[2];
209 double decilePoint[2] = {10.0, 90.0}, decileResult[2];
210 double percent;
211 short columnMajorOrder = -1;
212 int threads = 1;
213
215 argc = scanargs(&scanned, argc, argv);
216 if (argc < 2) {
217 bomb("too few arguments", USAGE);
218 }
219
220 posColumnName = NULL;
221 input = output = positionColumn = NULL;
222 stat = NULL;
223 request = NULL;
224 stats = requests = pipeFlags = 0;
225 noWarnings = 0;
226 outputData = NULL;
227 statWorkArray = NULL;
228
229 for (i_arg = 1; i_arg < argc; i_arg++) {
230 scanFlags = 0;
231 if (scanned[i_arg].arg_type == OPTION) {
232 /* process options here */
233 switch (code = match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
234 case SET_MAXIMUM:
235 case SET_MINIMUM:
236 case SET_MEAN:
237 case SET_MEDIAN:
238 case SET_STANDARDDEVIATION:
239 case SET_RMS:
240 case SET_SIGMA:
241 case SET_MAD:
242 case SET_COUNT:
243 case SET_DRANGE:
244 case SET_QRANGE:
245 case SET_SMALLEST:
246 case SET_LARGEST:
247 case SET_SPREAD_ARG:
248 if (scanned[i_arg].n_items < 3) {
249 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
250 exit(EXIT_FAILURE);
251 }
252 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
253 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
254 SCANITEMLIST_IGNORE_VALUELESS,
255 "positionColumn", SDDS_STRING, &positionColumn, 1, POSITIONCOLUMN_GIVEN,
256 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
257 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
258 sprintf(s, "invalid -%s syntax", scanned[i_arg].list[0]);
259 SDDS_Bomb(s);
260 }
261 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
262 request[requests - 1].topLimit = topLimit;
263 request[requests - 1].bottomLimit = bottomLimit;
264 if (positionColumn) {
265 if (code == SET_MAXIMUM || code == SET_MINIMUM || code == SET_LARGEST || code == SET_SMALLEST)
266 SDDS_CopyString(&request[requests - 1].positionColumn, positionColumn);
267 free(positionColumn);
268 positionColumn = NULL;
269 }
270 break;
271 case SET_PERCENTILE:
272 if (scanned[i_arg].n_items < 3) {
273 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
274 exit(EXIT_FAILURE);
275 }
276 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
277 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
278 SCANITEMLIST_IGNORE_VALUELESS,
279 "value", SDDS_DOUBLE, &percent, 1, PERCENT_GIVEN,
280 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
281 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL) ||
282 !(scanFlags & PERCENT_GIVEN) || percent <= 0 || percent >= 100)
283 SDDS_Bomb("invalid -percentile syntax");
284 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
285 request[requests - 1].percent = percent;
286 request[requests - 1].topLimit = topLimit;
287 request[requests - 1].bottomLimit = bottomLimit;
288 break;
289 case SET_SUM:
290 if (scanned[i_arg].n_items < 3) {
291 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
292 exit(EXIT_FAILURE);
293 }
294 power = 1;
295 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
296 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
297 "power", SDDS_LONG, &power, 1, 0,
298 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
299 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
300 SDDS_Bomb("invalid -sum syntax");
301 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
302 request[requests - 1].sumPower = power;
303 request[requests - 1].topLimit = topLimit;
304 request[requests - 1].bottomLimit = bottomLimit;
305 break;
306 case SET_PIPE:
307 if (!processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipeFlags))
308 SDDS_Bomb("invalid -pipe syntax");
309 break;
310 case SET_NOWARNINGS:
311 noWarnings = 1;
312 break;
313 case SET_MAJOR_ORDER:
314 majorOrderFlag = 0;
315 scanned[i_arg].n_items--;
316 if (scanned[i_arg].n_items > 0 &&
317 (!scanItemList(&majorOrderFlag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
318 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
319 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
320 SDDS_Bomb("invalid -majorOrder syntax/values");
321 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
322 columnMajorOrder = 1;
323 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
324 columnMajorOrder = 0;
325 break;
326 case SET_THREADS:
327 if (scanned[i_arg].n_items != 2 ||
328 !sscanf(scanned[i_arg].list[1], "%d", &threads) || threads < 1)
329 SDDS_Bomb("invalid -threads syntax");
330 break;
331 default:
332 fprintf(stderr, "error: unknown option '%s' given\n", scanned[i_arg].list[0]);
333 exit(EXIT_FAILURE);
334 break;
335 }
336 } else {
337 /* argument is filename */
338 if (!input)
339 input = scanned[i_arg].list[0];
340 else if (!output)
341 output = scanned[i_arg].list[0];
342 else
343 SDDS_Bomb("too many filenames seen");
344 }
345 }
346
347 processFilenames("sddsrowstats", &input, &output, pipeFlags, noWarnings, &tmpFileUsed);
348
349 if (!requests)
350 SDDS_Bomb("no statistics requested");
351
352 if (!SDDS_InitializeInput(&inData, input))
353 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
354
355 if (!(stat = compileStatDefinitions(&inData, request, requests, &stats, noWarnings))) {
356 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
357 exit(EXIT_FAILURE);
358 }
359 if (stats < 0)
360 SDDS_Bomb("No valid statistics requests.");
361 for (iStat = maxSourceColumns = 0; iStat < stats; iStat++) {
362 if (stat[iStat].sourceColumns > maxSourceColumns)
363 maxSourceColumns = stat[iStat].sourceColumns;
364 }
365 if (!(statWorkArray = malloc(sizeof(*statWorkArray) * maxSourceColumns)))
366 SDDS_Bomb("allocation failure (statWorkArray)");
367
368 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder)) {
369 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
370 exit(EXIT_FAILURE);
371 }
372
373 inputData = NULL;
374
375 while ((code = SDDS_ReadPage(&inData)) > 0) {
376 if (!SDDS_CopyPage(&outData, &inData)) {
377 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
378 exit(EXIT_FAILURE);
379 }
380 if ((rows = SDDS_CountRowsOfInterest(&inData))) {
381 if (!(outputData = (double *)malloc(sizeof(*outputData) * rows))) {
382 SDDS_Bomb("memory allocation failure");
383 }
384 if (!(posColumnName = (char **)malloc(sizeof(*posColumnName) * rows))) {
385 SDDS_Bomb("memory allocation failure");
386 }
387 for (iStat = 0; iStat < stats; iStat++) {
388 if (!(inputData = (double **)malloc(sizeof(*inputData) * stat[iStat].sourceColumns))) {
389 SDDS_Bomb("memory allocation failure");
390 }
391 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
392 if (!(inputData[iColumn] = SDDS_GetColumnInDoubles(&inData, stat[iStat].sourceColumn[iColumn]))) {
393 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
394 }
395 }
396 for (row = 0; row < rows; row++)
397 outputData[row] = DBL_MAX;
398 switch (stat[iStat].optionCode) {
399 case SET_MINIMUM:
400 for (row = 0; row < rows; row++) {
401 value1 = DBL_MAX;
402 posColIndex = 0;
403 posColumnName[row] = NULL;
404 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
405 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
406 continue;
407 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
408 continue;
409 if (inputData[iColumn][row] < value1) {
410 value1 = inputData[iColumn][row];
411 posColIndex = iColumn;
412 }
413 }
414 outputData[row] = value1;
415 if (stat[iStat].positionColumn)
416 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
417 }
418 break;
419 case SET_MAXIMUM:
420 for (row = 0; row < rows; row++) {
421 posColIndex = 0;
422 value1 = -DBL_MAX;
423 posColumnName[row] = NULL;
424 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
425 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
426 continue;
427 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
428 continue;
429 if (inputData[iColumn][row] > value1) {
430 posColIndex = iColumn;
431 value1 = inputData[iColumn][row];
432 }
433 }
434 outputData[row] = value1;
435 if (stat[iStat].positionColumn)
436 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
437 }
438 break;
439 case SET_MEAN:
440 for (row = 0; row < rows; row++) {
441 value1 = 0;
442 count = 0;
443 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
444 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
445 continue;
446 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
447 continue;
448 value1 += inputData[iColumn][row];
449 count++;
450 }
451 if (count)
452 outputData[row] = value1 / count;
453 }
454 break;
455 case SET_MEDIAN:
456 for (row = 0; row < rows; row++) {
457 for (iColumn = count = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
458 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
459 continue;
460 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
461 continue;
462 statWorkArray[count] = inputData[iColumn][row];
463 count++;
464 }
465 if (count)
466 compute_median(outputData + row, statWorkArray, count);
467 }
468 break;
469 case SET_STANDARDDEVIATION:
470 for (row = 0; row < rows; row++) {
471 value1 = 0;
472 value2 = 0;
473 count = 0;
474 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
475 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
476 continue;
477 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
478 continue;
479 value1 += inputData[iColumn][row];
480 value2 += inputData[iColumn][row] * inputData[iColumn][row];
481 count++;
482 }
483 if (count > 1) {
484 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
485 outputData[row] = 0;
486 else
487 outputData[row] = sqrt(value1 * count / (count - 1.0));
488 }
489 }
490 break;
491 case SET_SIGMA:
492 for (row = 0; row < rows; row++) {
493 value1 = 0;
494 value2 = 0;
495 count = 0;
496 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
497 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
498 continue;
499 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
500 continue;
501 value1 += inputData[iColumn][row];
502 value2 += inputData[iColumn][row] * inputData[iColumn][row];
503 count++;
504 }
505 if (count > 1) {
506 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
507 outputData[row] = 0;
508 else
509 outputData[row] = sqrt(value1 / (count - 1.0));
510 }
511 }
512 break;
513 case SET_RMS:
514 for (row = 0; row < rows; row++) {
515 value1 = 0;
516 count = 0;
517 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
518 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
519 continue;
520 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
521 continue;
522 value1 += sqr(inputData[iColumn][row]);
523 count++;
524 }
525 if (count)
526 outputData[row] = sqrt(value1 / count);
527 }
528 break;
529 case SET_SUM:
530 for (row = 0; row < rows; row++) {
531 value1 = 0;
532 count = 0;
533 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
534 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
535 continue;
536 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
537 continue;
538 value1 += ipow(inputData[iColumn][row], stat[iStat].sumPower);
539 count++;
540 }
541 if (count)
542 outputData[row] = value1;
543 }
544 break;
545 case SET_COUNT:
546 for (row = 0; row < rows; row++) {
547 count = 0;
548 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
549 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
550 continue;
551 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
552 continue;
553 count++;
554 }
555 outputData[row] = count;
556 }
557 break;
558 case SET_MAD:
559 for (row = 0; row < rows; row++) {
560 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
561 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
562 continue;
563 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
564 continue;
565 statWorkArray[count] = inputData[iColumn][row];
566 count++;
567 }
568 if (count)
569 computeMomentsThreaded(NULL, NULL, NULL, &outputData[row], statWorkArray, count, threads);
570 }
571 break;
572 case SET_DRANGE:
573 for (row = 0; row < rows; row++) {
574 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
575 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
576 continue;
577 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
578 continue;
579 statWorkArray[count] = inputData[iColumn][row];
580 count++;
581 }
582 if (count && compute_percentiles(decileResult, decilePoint, 2, statWorkArray, count))
583 outputData[row] = decileResult[1] - decileResult[0];
584 }
585 break;
586 case SET_QRANGE:
587 for (row = 0; row < rows; row++) {
588 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
589 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
590 continue;
591 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
592 continue;
593 statWorkArray[count] = inputData[iColumn][row];
594 count++;
595 }
596 if (count && compute_percentiles(quartileResult, quartilePoint, 2, statWorkArray, count))
597 outputData[row] = quartileResult[1] - quartileResult[0];
598 }
599 break;
600 case SET_SMALLEST:
601 for (row = 0; row < rows; row++) {
602 value1 = DBL_MAX;
603 posColIndex = 0;
604 posColumnName[row] = NULL;
605 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
606 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
607 continue;
608 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
609 continue;
610 if ((value2 = fabs(inputData[iColumn][row])) < value1) {
611 posColIndex = iColumn;
612 value1 = value2;
613 }
614 }
615 outputData[row] = value1;
616 if (stat[iStat].positionColumn)
617 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
618 }
619 break;
620 case SET_LARGEST:
621 for (row = 0; row < rows; row++) {
622 value1 = 0;
623 posColIndex = 0;
624 posColumnName[row] = NULL;
625 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
626 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
627 continue;
628 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
629 continue;
630 if ((value2 = fabs(inputData[iColumn][row])) > value1) {
631 posColIndex = iColumn;
632 value1 = value2;
633 }
634 }
635 outputData[row] = value1;
636 if (stat[iStat].positionColumn)
637 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
638 }
639 break;
640 case SET_SPREAD_ARG:
641 for (row = 0; row < rows; row++) {
642 value1 = DBL_MAX; /* min */
643 value2 = -DBL_MAX; /* max */
644 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
645 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
646 continue;
647 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
648 continue;
649 if (inputData[iColumn][row] < value1)
650 value1 = inputData[iColumn][row];
651 if (inputData[iColumn][row] > value2)
652 value2 = inputData[iColumn][row];
653 }
654 outputData[row] = value2 - value1;
655 }
656 break;
657 case SET_PERCENTILE:
658 for (row = 0; row < rows; row++) {
659 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
660 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
661 continue;
662 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
663 continue;
664 statWorkArray[count] = inputData[iColumn][row];
665 count++;
666 }
667 outputData[row] = HUGE_VAL;
668 if (count)
669 compute_percentiles(&outputData[row], &stat[iStat].percent, 1, statWorkArray, count);
670 }
671 break;
672 default:
673 SDDS_Bomb("invalid statistic code (accumulation loop)");
674 break;
675 }
676 if (!SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, outputData, rows, stat[iStat].resultIndex))
677 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
678
679 if (stat[iStat].positionColumn) {
680 if (!SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, posColumnName, rows, stat[iStat].positionColumnIndex))
681 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
682 }
683 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
684 free(inputData[iColumn]);
685 free(inputData);
686 inputData = NULL;
687 }
688 free(outputData);
689 outputData = NULL;
690 free(posColumnName);
691 posColumnName = NULL;
692 }
693 if (!SDDS_WritePage(&outData))
694 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
695 }
696 free_scanargs(&scanned, argc);
697 for (iStat = 0; iStat < stats; iStat++) {
698 if (stat[iStat].positionColumn)
699 free(stat[iStat].positionColumn);
700 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
701 free(stat[iStat].sourceColumn[iColumn]);
702 free(stat[iStat].sourceColumn);
703 }
704 free(request);
705 free(stat);
706 if (statWorkArray)
707 free(statWorkArray);
708
709 if (!SDDS_Terminate(&inData) || !SDDS_Terminate(&outData)) {
710 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
711 exit(EXIT_FAILURE);
712 }
713 if (tmpFileUsed && !replaceFileAndBackUp(input, output))
714 exit(EXIT_FAILURE);
715 return EXIT_SUCCESS;
716}
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
Definition ipow.c:33
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long compute_percentiles(double *position, double *percent, long positions, double *x, long n)
Computes multiple percentiles of an array of doubles.
Definition median.c:84
long compute_median(double *value, double *x, long n)
Computes the median of an array of doubles.
Definition median.c:29
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ setupOutputFile()

long setupOutputFile ( SDDS_DATASET * outTable,
char * output,
SDDS_DATASET * inTable,
STAT_DEFINITION * stat,
long stats,
short columnMajorOrder )

Definition at line 767 of file sddsrowstats.c.

767 {
768 long column;
769 char s[SDDS_MAXLINE];
770
771 if (!SDDS_InitializeCopy(outData, inData, output, "w"))
772 return 0;
773 if (columnMajorOrder != -1)
774 outData->layout.data_mode.column_major = columnMajorOrder;
775 else
776 outData->layout.data_mode.column_major = inData->layout.data_mode.column_major;
777 for (column = 0; column < stats; column++) {
778 if (!SDDS_TransferColumnDefinition(outData, inData, stat[column].sourceColumn[0], stat[column].resultColumn)) {
779 sprintf(s, "Problem transferring definition of column %s to %s\n", stat[column].sourceColumn[0], stat[column].resultColumn);
780 SDDS_SetError(s);
781 return 0;
782 }
783 if ((stat[column].resultIndex = SDDS_GetColumnIndex(outData, stat[column].resultColumn)) < 0) {
784 sprintf(s, "Problem creating column %s", stat[column].resultColumn);
785 SDDS_SetError(s);
786 return 0;
787 }
788 if (stat[column].positionColumn) {
789 if (!SDDS_DefineSimpleColumn(outData, stat[column].positionColumn, NULL, SDDS_STRING)) {
790 sprintf(s, "Problem define column %s\n", stat[column].positionColumn);
791 SDDS_SetError(s);
792 return 0;
793 }
794 if ((stat[column].positionColumnIndex = SDDS_GetColumnIndex(outData, stat[column].positionColumn)) < 0) {
795 sprintf(s, "Problem creating column %s", stat[column].positionColumn);
796 SDDS_SetError(s);
797 return 0;
798 }
799 }
800 if (!SDDS_ChangeColumnInformation(outData, "description", "", SDDS_SET_BY_NAME, stat[column].resultColumn) ||
801 !SDDS_ChangeColumnInformation(outData, "symbol", "", SDDS_SET_BY_NAME, stat[column].resultColumn) ||
802 !SDDS_ChangeColumnInformation(outData, "type", "double", SDDS_SET_BY_NAME | SDDS_PASS_BY_STRING, stat[column].resultColumn)) {
803 sprintf(s, "Problem changing attributes of new column %s", stat[column].resultColumn);
804 SDDS_SetError(s);
805 return 0;
806 }
807 }
808 if (!SDDS_WriteLayout(outData))
809 return 0;
810 return 1;
811}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.

Variable Documentation

◆ option

char* option[N_OPTIONS]
Initial value:
= {
"maximum",
"minimum",
"mean",
"standarddeviation",
"rms",
"sum",
"sigma",
"count",
"pipe",
"median",
"mad",
"nowarnings",
"drange",
"qrange",
"largest",
"smallest",
"spread",
"percentile",
"majorOrder",
"threads",
}

Definition at line 84 of file sddsrowstats.c.

84 {
85 "maximum",
86 "minimum",
87 "mean",
88 "standarddeviation",
89 "rms",
90 "sum",
91 "sigma",
92 "count",
93 "pipe",
94 "median",
95 "mad",
96 "nowarnings",
97 "drange",
98 "qrange",
99 "largest",
100 "smallest",
101 "spread",
102 "percentile",
103 "majorOrder",
104 "threads",
105};

◆ USAGE

char* USAGE
static

Definition at line 139 of file sddsrowstats.c.