SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsrowstats.c
Go to the documentation of this file.
1/**
2 * @file sddsrowstats.c
3 * @brief Computes statistics for rows across multiple columns in SDDS datasets.
4 *
5 * @details
6 * The `sddsrowstats` program processes an SDDS dataset and computes specified statistics
7 * for each row across selected columns. New columns with the results are added to the
8 * output dataset. It supports a variety of statistical operations such as mean, median,
9 * standard deviation, and sum, among others. Users can also specify limits and conditions
10 * for data selection.
11 *
12 * @section Usage
13 * ```
14 * sddsrowstats [<inputfile>] [<outputfile>]
15 * [-pipe[=input][,output]]
16 * [-nowarnings]
17 * [-mean=<newName>,[,<limitOps>],<columnNameList>]
18 * [-rms=<newName>,[,<limitOps>],<columnNameList>]
19 * [-median=<newName>[,<limitOps>],<columnNameList>]
20 * [-minimum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]
21 * [-maximum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]
22 * [-standardDeviation=<newName>[,<limitOps>],<columnNameList>]
23 * [-sigma=<newName>[,<limitOps>],<columnNameList>]
24 * [-mad=<newName>[,<limitOps>],<columnNameList>]
25 * [-sum=<newName>[,<limitOps>][,power=<integer>],<columnNameList>]
26 * [-spread=<newName>[,<limitOps>],<columnNameList>]
27 * [-drange=<newName>[,<limitOps>],<columnNameList>]
28 * [-qrange=<newName>[,<limitOps>],<columnNameList>]
29 * [-smallest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]
30 * [-largest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]
31 * [-count=<newName>[,<limitOps>],<columnNameList>]
32 * [-percentile=<newName>[,<limitOps>],value=<percent>,<columnNameList]
33 * [-majorOrder=row|column]
34 * [-threads=<number>]
35 *
36 * <limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>]
37 * ```
38 *
39 * @section Options
40 * | Optional | Description |
41 * |---------------------------------------|-----------------------------------------------------------------------------|
42 * | `-pipe` | Use standard input/output for data streams. |
43 * | `-nowarnings` | Suppress warning messages. |
44 * | `-mean` | Compute the mean for specified columns. |
45 * | `-rms` | Compute the root mean square for specified columns. |
46 * | `-median` | Compute the median for specified columns. |
47 * | `-minimum` | Compute the minimum and optionally store the column of occurrence. |
48 * | `-maximum` | Compute the maximum and optionally store the column of occurrence. |
49 * | `-standardDeviation` | Compute the standard deviation for specified columns. |
50 * | `-sigma` | Compute the sigma for specified columns. |
51 * | `-mad` | Compute the median absolute deviation for specified columns. |
52 * | `-sum` | Compute the sum with optional power for specified columns. |
53 * | `-spread` | Compute the maximum value minus the minimum value for specified columns. |
54 * | `-drange` | Compute the decile range for specified columns. |
55 * | `-qrange` | Compute the quartile range for specified columns. |
56 * | `-smallest` | Compute the smallest absolute value for specified columns. |
57 * | `-largest` | Compute the largest absolute value for specified columns. |
58 * | `-count` | Compute the number of values within the limits for specified columns. |
59 * | `-percentile` | Compute the specified percentile value for specified columns. |
60 * | `-majorOrder` | Specify row or column major order for the output dataset. |
61 * | `-threads` | Specify the number of threads for parallel computations. |
62 *
63 * @copyright
64 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
65 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
66 *
67 * @license
68 * This file is distributed under the terms of the Software License Agreement
69 * found in the file LICENSE included with this distribution.
70 *
71 * @authors
72 * M. Borland, R. Soliday, H. Shang
73 */
74
75#include "mdb.h"
76#include "scan.h"
77#include "SDDS.h"
78#include "SDDSutils.h"
79#include <ctype.h>
80
81/* Enumeration for option types */
82enum option_type {
83 SET_MAXIMUM,
84 SET_MINIMUM,
85 SET_MEAN,
86 SET_STANDARDDEVIATION,
87 SET_RMS,
88 SET_SUM,
89 SET_SIGMA,
90 SET_COUNT,
91 SET_PIPE,
92 SET_MEDIAN,
93 SET_MAD,
94 SET_NOWARNINGS,
95 SET_DRANGE,
96 SET_QRANGE,
97 SET_LARGEST,
98 SET_SMALLEST,
99 SET_SPREAD_ARG,
100 SET_PERCENTILE,
101 SET_MAJOR_ORDER,
102 SET_THREADS,
103 N_OPTIONS
104};
105
106char *option[N_OPTIONS] = {
107 "maximum",
108 "minimum",
109 "mean",
110 "standarddeviation",
111 "rms",
112 "sum",
113 "sigma",
114 "count",
115 "pipe",
116 "median",
117 "mad",
118 "nowarnings",
119 "drange",
120 "qrange",
121 "largest",
122 "smallest",
123 "spread",
124 "percentile",
125 "majorOrder",
126 "threads",
127};
128
129#define TOPLIMIT_GIVEN 0x0001U
130#define BOTTOMLIMIT_GIVEN 0x0002U
131#define POSITIONCOLUMN_GIVEN 0x0004U
132#define PERCENT_GIVEN 0x0008U
133
134/* this structure stores a command-line request for statistics computation */
135/* individual elements of sourceColumn may contain wildcards */
136typedef struct
137{
138 char **sourceColumn, *resultColumn, *positionColumn;
139 long sourceColumns, sumPower, optionCode, positionColumnIndex;
140 double percent;
141 unsigned long flags;
142 double topLimit, bottomLimit;
144
145/* this structure stores data necessary for accessing/creating SDDS columns and
146 * for computing a statistic
147 */
148typedef struct
149{
150 char **sourceColumn, *resultColumn, *positionColumn;
151 long sourceColumns, optionCode, resultIndex, sumPower, positionColumnIndex;
152 double percent;
153 unsigned long flags;
154 double topLimit, bottomLimit;
156
157long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flag);
158STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inTable, STAT_REQUEST *request, long requests, long *stats, long noWarnings);
159long setupOutputFile(SDDS_DATASET *outTable, char *output, SDDS_DATASET *inTable, STAT_DEFINITION *stat, long stats, short columnMajorOrder);
160
161static char *USAGE =
162 "sddsrowstats [<input>] [<output>]\n"
163 " [-pipe[=input][,output]]\n"
164 " [-nowarnings]\n"
165 " [-mean=<newName>,[,<limitOps>],<columnNameList>]\n"
166 " [-rms=<newName>,[,<limitOps>],<columnNameList>]\n"
167 " [-median=<newName>[,<limitOps>],<columnNameList>]\n"
168 " [-minimum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]\n"
169 " [-maximum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]\n"
170 " [-standardDeviation=<newName>[,<limitOps>],<columnNameList>]\n"
171 " [-sigma=<newName>[,<limitOps>],<columnNameList>]\n"
172 " [-mad=<newName>[,<limitOps>],<columnNameList>]\n"
173 " [-sum=<newName>[,<limitOps>][,power=<integer>],<columnNameList>] \n"
174 " [-spread=<newName>[,<limitOps>],<columnNameList>]\n"
175 " [-drange=<newName>[,<limitOps>],<columnNameList>]\n"
176 " [-qrange=<newName>[,<limitOps>],<columnNameList>]\n"
177 " [-smallest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]\n"
178 " [-largest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>]\n"
179 " [-count=<newName>[,<limitOps>],<columnNameList>]\n"
180 " [-percentile=<newName>[,<limitOps>],value=<percent>,<columnNameList]\n"
181 " [-majorOrder=row|column]\n"
182 " [-threads=<number>]\n"
183 "\nOptions:\n"
184 " -pipe[=input][,output]\n"
185 " Use pipe for input and/or output.\n"
186 " -nowarnings\n"
187 " Suppress warning messages.\n"
188 " -mean=<newName>[,<limitOps>],<columnNameList>\n"
189 " Compute the mean of the specified columns.\n"
190 " -rms=<newName>[,<limitOps>],<columnNameList>\n"
191 " Compute the root mean square of the specified columns.\n"
192 " -median=<newName>[,<limitOps>],<columnNameList>\n"
193 " Compute the median of the specified columns.\n"
194 " -minimum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
195 " Compute the minimum value among the specified columns.\n"
196 " -maximum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
197 " Compute the maximum value among the specified columns.\n"
198 " -standardDeviation=<newName>[,<limitOps>],<columnNameList>\n"
199 " Compute the standard deviation of the specified columns.\n"
200 " -sigma=<newName>[,<limitOps>],<columnNameList>\n"
201 " Compute the sigma (standard deviation) of the specified columns.\n"
202 " -mad=<newName>[,<limitOps>],<columnNameList>\n"
203 " Compute the median absolute deviation (MAD) of the specified columns.\n"
204 " -sum=<newName>[,<limitOps>][,power=<integer>],<columnNameList>\n"
205 " Compute the sum of the specified columns.\n"
206 " -spread=<newName>[,<limitOps>],<columnNameList>\n"
207 " Compute the spread (max - min) of the specified columns.\n"
208 " -drange=<newName>[,<limitOps>],<columnNameList>\n"
209 " Compute the decile range of the specified columns.\n"
210 " -qrange=<newName>[,<limitOps>],<columnNameList>\n"
211 " Compute the quartile range of the specified columns.\n"
212 " -smallest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
213 " Compute the smallest absolute value among the specified columns.\n"
214 " -largest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
215 " Compute the largest absolute value among the specified columns.\n"
216 " -count=<newName>[,<limitOps>],<columnNameList>\n"
217 " Count the number of valid entries in the specified columns.\n"
218 " -percentile=<newName>[,<limitOps>],value=<percent>,<columnNameList>\n"
219 " Compute the specified percentile of the given columns.\n"
220 " -majorOrder=row|column\n"
221 " Set the data ordering to row-major or column-major.\n"
222 " -threads=<number>\n"
223 " Specify the number of threads to use for computations.\n"
224 "\n<limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>]\n"
225 "\nComputes statistics for each row of each input table, adding new columns to the\n"
226 "output table. Each row statistic is done using data from the columns listed in\n"
227 "<columnNameList>, which is a comma-separated list of optionally-wildcarded column\n"
228 "names. positionColumn=<name> for minimum, maximum, smallest, largest option is to store \n"
229 "the corresponding column name of the minimum, maximum, smallest, or largest in each row to \n"
230 "the new output column - <name>.\n"
231 "\nProgram by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
232
233int main(int argc, char **argv) {
234 STAT_DEFINITION *stat;
235 long stats;
236 STAT_REQUEST *request;
237 long requests;
238 SCANNED_ARG *scanned;
239 SDDS_DATASET inData, outData;
240
241 int32_t power;
242 long i_arg, code, iStat, tmpFileUsed, iColumn, posColIndex;
243 int64_t rows, row, count;
244 long noWarnings, maxSourceColumns;
245 char *input, *output, *positionColumn, **posColumnName;
246 double **inputData, *outputData, value1, value2, topLimit, bottomLimit;
247 unsigned long pipeFlags, scanFlags, majorOrderFlag;
248 char s[100];
249 double *statWorkArray;
250 double quartilePoint[2] = {25.0, 75.0}, quartileResult[2];
251 double decilePoint[2] = {10.0, 90.0}, decileResult[2];
252 double percent;
253 short columnMajorOrder = -1;
254 int threads = 1;
255
257 argc = scanargs(&scanned, argc, argv);
258 if (argc < 2) {
259 bomb("too few arguments", USAGE);
260 }
261
262 posColumnName = NULL;
263 input = output = positionColumn = NULL;
264 stat = NULL;
265 request = NULL;
266 stats = requests = pipeFlags = 0;
267 noWarnings = 0;
268 outputData = NULL;
269 statWorkArray = NULL;
270
271 for (i_arg = 1; i_arg < argc; i_arg++) {
272 scanFlags = 0;
273 if (scanned[i_arg].arg_type == OPTION) {
274 /* process options here */
275 switch (code = match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
276 case SET_MAXIMUM:
277 case SET_MINIMUM:
278 case SET_MEAN:
279 case SET_MEDIAN:
280 case SET_STANDARDDEVIATION:
281 case SET_RMS:
282 case SET_SIGMA:
283 case SET_MAD:
284 case SET_COUNT:
285 case SET_DRANGE:
286 case SET_QRANGE:
287 case SET_SMALLEST:
288 case SET_LARGEST:
289 case SET_SPREAD_ARG:
290 if (scanned[i_arg].n_items < 3) {
291 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
292 exit(EXIT_FAILURE);
293 }
294 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
295 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
296 SCANITEMLIST_IGNORE_VALUELESS,
297 "positionColumn", SDDS_STRING, &positionColumn, 1, POSITIONCOLUMN_GIVEN,
298 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
299 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
300 sprintf(s, "invalid -%s syntax", scanned[i_arg].list[0]);
301 SDDS_Bomb(s);
302 }
303 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
304 request[requests - 1].topLimit = topLimit;
305 request[requests - 1].bottomLimit = bottomLimit;
306 if (positionColumn) {
307 if (code == SET_MAXIMUM || code == SET_MINIMUM || code == SET_LARGEST || code == SET_SMALLEST)
308 SDDS_CopyString(&request[requests - 1].positionColumn, positionColumn);
309 free(positionColumn);
310 positionColumn = NULL;
311 }
312 break;
313 case SET_PERCENTILE:
314 if (scanned[i_arg].n_items < 3) {
315 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
316 exit(EXIT_FAILURE);
317 }
318 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
319 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
320 SCANITEMLIST_IGNORE_VALUELESS,
321 "value", SDDS_DOUBLE, &percent, 1, PERCENT_GIVEN,
322 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
323 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL) ||
324 !(scanFlags & PERCENT_GIVEN) || percent <= 0 || percent >= 100)
325 SDDS_Bomb("invalid -percentile syntax");
326 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
327 request[requests - 1].percent = percent;
328 request[requests - 1].topLimit = topLimit;
329 request[requests - 1].bottomLimit = bottomLimit;
330 break;
331 case SET_SUM:
332 if (scanned[i_arg].n_items < 3) {
333 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
334 exit(EXIT_FAILURE);
335 }
336 power = 1;
337 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
338 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
339 "power", SDDS_LONG, &power, 1, 0,
340 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
341 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
342 SDDS_Bomb("invalid -sum syntax");
343 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
344 request[requests - 1].sumPower = power;
345 request[requests - 1].topLimit = topLimit;
346 request[requests - 1].bottomLimit = bottomLimit;
347 break;
348 case SET_PIPE:
349 if (!processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipeFlags))
350 SDDS_Bomb("invalid -pipe syntax");
351 break;
352 case SET_NOWARNINGS:
353 noWarnings = 1;
354 break;
355 case SET_MAJOR_ORDER:
356 majorOrderFlag = 0;
357 scanned[i_arg].n_items--;
358 if (scanned[i_arg].n_items > 0 &&
359 (!scanItemList(&majorOrderFlag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
360 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
361 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
362 SDDS_Bomb("invalid -majorOrder syntax/values");
363 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
364 columnMajorOrder = 1;
365 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
366 columnMajorOrder = 0;
367 break;
368 case SET_THREADS:
369 if (scanned[i_arg].n_items != 2 ||
370 !sscanf(scanned[i_arg].list[1], "%d", &threads) || threads < 1)
371 SDDS_Bomb("invalid -threads syntax");
372 break;
373 default:
374 fprintf(stderr, "error: unknown option '%s' given\n", scanned[i_arg].list[0]);
375 exit(EXIT_FAILURE);
376 break;
377 }
378 } else {
379 /* argument is filename */
380 if (!input)
381 input = scanned[i_arg].list[0];
382 else if (!output)
383 output = scanned[i_arg].list[0];
384 else
385 SDDS_Bomb("too many filenames seen");
386 }
387 }
388
389 processFilenames("sddsrowstats", &input, &output, pipeFlags, noWarnings, &tmpFileUsed);
390
391 if (!requests)
392 SDDS_Bomb("no statistics requested");
393
394 if (!SDDS_InitializeInput(&inData, input))
395 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
396
397 if (!(stat = compileStatDefinitions(&inData, request, requests, &stats, noWarnings))) {
398 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
399 exit(EXIT_FAILURE);
400 }
401 if (stats < 0)
402 SDDS_Bomb("No valid statistics requests.");
403 for (iStat = maxSourceColumns = 0; iStat < stats; iStat++) {
404 if (stat[iStat].sourceColumns > maxSourceColumns)
405 maxSourceColumns = stat[iStat].sourceColumns;
406 }
407 if (!(statWorkArray = malloc(sizeof(*statWorkArray) * maxSourceColumns)))
408 SDDS_Bomb("allocation failure (statWorkArray)");
409
410 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder)) {
411 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
412 exit(EXIT_FAILURE);
413 }
414
415 inputData = NULL;
416
417 while ((code = SDDS_ReadPage(&inData)) > 0) {
418 if (!SDDS_CopyPage(&outData, &inData)) {
419 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
420 exit(EXIT_FAILURE);
421 }
422 if ((rows = SDDS_CountRowsOfInterest(&inData))) {
423 if (!(outputData = (double *)malloc(sizeof(*outputData) * rows))) {
424 SDDS_Bomb("memory allocation failure");
425 }
426 if (!(posColumnName = (char **)malloc(sizeof(*posColumnName) * rows))) {
427 SDDS_Bomb("memory allocation failure");
428 }
429 for (iStat = 0; iStat < stats; iStat++) {
430 if (!(inputData = (double **)malloc(sizeof(*inputData) * stat[iStat].sourceColumns))) {
431 SDDS_Bomb("memory allocation failure");
432 }
433 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
434 if (!(inputData[iColumn] = SDDS_GetColumnInDoubles(&inData, stat[iStat].sourceColumn[iColumn]))) {
435 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
436 }
437 }
438 for (row = 0; row < rows; row++)
439 outputData[row] = DBL_MAX;
440 switch (stat[iStat].optionCode) {
441 case SET_MINIMUM:
442 for (row = 0; row < rows; row++) {
443 value1 = DBL_MAX;
444 posColIndex = 0;
445 posColumnName[row] = NULL;
446 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
447 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
448 continue;
449 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
450 continue;
451 if (inputData[iColumn][row] < value1) {
452 value1 = inputData[iColumn][row];
453 posColIndex = iColumn;
454 }
455 }
456 outputData[row] = value1;
457 if (stat[iStat].positionColumn)
458 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
459 }
460 break;
461 case SET_MAXIMUM:
462 for (row = 0; row < rows; row++) {
463 posColIndex = 0;
464 value1 = -DBL_MAX;
465 posColumnName[row] = NULL;
466 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
467 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
468 continue;
469 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
470 continue;
471 if (inputData[iColumn][row] > value1) {
472 posColIndex = iColumn;
473 value1 = inputData[iColumn][row];
474 }
475 }
476 outputData[row] = value1;
477 if (stat[iStat].positionColumn)
478 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
479 }
480 break;
481 case SET_MEAN:
482 for (row = 0; row < rows; row++) {
483 value1 = 0;
484 count = 0;
485 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
486 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
487 continue;
488 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
489 continue;
490 value1 += inputData[iColumn][row];
491 count++;
492 }
493 if (count)
494 outputData[row] = value1 / count;
495 }
496 break;
497 case SET_MEDIAN:
498 for (row = 0; row < rows; row++) {
499 for (iColumn = count = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
500 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
501 continue;
502 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
503 continue;
504 statWorkArray[count] = inputData[iColumn][row];
505 count++;
506 }
507 if (count)
508 compute_median(outputData + row, statWorkArray, count);
509 }
510 break;
511 case SET_STANDARDDEVIATION:
512 for (row = 0; row < rows; row++) {
513 value1 = 0;
514 value2 = 0;
515 count = 0;
516 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
517 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
518 continue;
519 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
520 continue;
521 value1 += inputData[iColumn][row];
522 value2 += inputData[iColumn][row] * inputData[iColumn][row];
523 count++;
524 }
525 if (count > 1) {
526 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
527 outputData[row] = 0;
528 else
529 outputData[row] = sqrt(value1 * count / (count - 1.0));
530 }
531 }
532 break;
533 case SET_SIGMA:
534 for (row = 0; row < rows; row++) {
535 value1 = 0;
536 value2 = 0;
537 count = 0;
538 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
539 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
540 continue;
541 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
542 continue;
543 value1 += inputData[iColumn][row];
544 value2 += inputData[iColumn][row] * inputData[iColumn][row];
545 count++;
546 }
547 if (count > 1) {
548 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
549 outputData[row] = 0;
550 else
551 outputData[row] = sqrt(value1 / (count - 1.0));
552 }
553 }
554 break;
555 case SET_RMS:
556 for (row = 0; row < rows; row++) {
557 value1 = 0;
558 count = 0;
559 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
560 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
561 continue;
562 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
563 continue;
564 value1 += sqr(inputData[iColumn][row]);
565 count++;
566 }
567 if (count)
568 outputData[row] = sqrt(value1 / count);
569 }
570 break;
571 case SET_SUM:
572 for (row = 0; row < rows; row++) {
573 value1 = 0;
574 count = 0;
575 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
576 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
577 continue;
578 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
579 continue;
580 value1 += ipow(inputData[iColumn][row], stat[iStat].sumPower);
581 count++;
582 }
583 if (count)
584 outputData[row] = value1;
585 }
586 break;
587 case SET_COUNT:
588 for (row = 0; row < rows; row++) {
589 count = 0;
590 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
591 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
592 continue;
593 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
594 continue;
595 count++;
596 }
597 outputData[row] = count;
598 }
599 break;
600 case SET_MAD:
601 for (row = 0; row < rows; row++) {
602 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
603 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
604 continue;
605 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
606 continue;
607 statWorkArray[count] = inputData[iColumn][row];
608 count++;
609 }
610 if (count)
611 computeMomentsThreaded(NULL, NULL, NULL, &outputData[row], statWorkArray, count, threads);
612 }
613 break;
614 case SET_DRANGE:
615 for (row = 0; row < rows; row++) {
616 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
617 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
618 continue;
619 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
620 continue;
621 statWorkArray[count] = inputData[iColumn][row];
622 count++;
623 }
624 if (count && compute_percentiles(decileResult, decilePoint, 2, statWorkArray, count))
625 outputData[row] = decileResult[1] - decileResult[0];
626 }
627 break;
628 case SET_QRANGE:
629 for (row = 0; row < rows; row++) {
630 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
631 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
632 continue;
633 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
634 continue;
635 statWorkArray[count] = inputData[iColumn][row];
636 count++;
637 }
638 if (count && compute_percentiles(quartileResult, quartilePoint, 2, statWorkArray, count))
639 outputData[row] = quartileResult[1] - quartileResult[0];
640 }
641 break;
642 case SET_SMALLEST:
643 for (row = 0; row < rows; row++) {
644 value1 = DBL_MAX;
645 posColIndex = 0;
646 posColumnName[row] = NULL;
647 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
648 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
649 continue;
650 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
651 continue;
652 if ((value2 = fabs(inputData[iColumn][row])) < value1) {
653 posColIndex = iColumn;
654 value1 = value2;
655 }
656 }
657 outputData[row] = value1;
658 if (stat[iStat].positionColumn)
659 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
660 }
661 break;
662 case SET_LARGEST:
663 for (row = 0; row < rows; row++) {
664 value1 = 0;
665 posColIndex = 0;
666 posColumnName[row] = NULL;
667 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
668 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
669 continue;
670 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
671 continue;
672 if ((value2 = fabs(inputData[iColumn][row])) > value1) {
673 posColIndex = iColumn;
674 value1 = value2;
675 }
676 }
677 outputData[row] = value1;
678 if (stat[iStat].positionColumn)
679 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
680 }
681 break;
682 case SET_SPREAD_ARG:
683 for (row = 0; row < rows; row++) {
684 value1 = DBL_MAX; /* min */
685 value2 = -DBL_MAX; /* max */
686 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
687 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
688 continue;
689 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
690 continue;
691 if (inputData[iColumn][row] < value1)
692 value1 = inputData[iColumn][row];
693 if (inputData[iColumn][row] > value2)
694 value2 = inputData[iColumn][row];
695 }
696 outputData[row] = value2 - value1;
697 }
698 break;
699 case SET_PERCENTILE:
700 for (row = 0; row < rows; row++) {
701 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
702 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
703 continue;
704 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
705 continue;
706 statWorkArray[count] = inputData[iColumn][row];
707 count++;
708 }
709 outputData[row] = HUGE_VAL;
710 if (count)
711 compute_percentiles(&outputData[row], &stat[iStat].percent, 1, statWorkArray, count);
712 }
713 break;
714 default:
715 SDDS_Bomb("invalid statistic code (accumulation loop)");
716 break;
717 }
718 if (!SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, outputData, rows, stat[iStat].resultIndex))
719 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
720
721 if (stat[iStat].positionColumn) {
722 if (!SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, posColumnName, rows, stat[iStat].positionColumnIndex))
723 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
724 }
725 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
726 free(inputData[iColumn]);
727 free(inputData);
728 inputData = NULL;
729 }
730 free(outputData);
731 outputData = NULL;
732 free(posColumnName);
733 posColumnName = NULL;
734 }
735 if (!SDDS_WritePage(&outData))
736 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
737 }
738 free_scanargs(&scanned, argc);
739 for (iStat = 0; iStat < stats; iStat++) {
740 if (stat[iStat].positionColumn)
741 free(stat[iStat].positionColumn);
742 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
743 free(stat[iStat].sourceColumn[iColumn]);
744 free(stat[iStat].sourceColumn);
745 }
746 free(request);
747 free(stat);
748 if (statWorkArray)
749 free(statWorkArray);
750
751 if (!SDDS_Terminate(&inData) || !SDDS_Terminate(&outData)) {
752 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
753 exit(EXIT_FAILURE);
754 }
755 if (tmpFileUsed && !replaceFileAndBackUp(input, output))
756 exit(EXIT_FAILURE);
757 return EXIT_SUCCESS;
758}
759
760long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flags) {
761 long i;
762 if (!(*statRequest = SDDS_Realloc(*statRequest, sizeof(**statRequest) * (requests + 1))) ||
763 !((*statRequest)[requests].sourceColumn = (char **)malloc(sizeof(*(*statRequest)[requests].sourceColumn) * (items - 1))))
764 SDDS_Bomb("memory allocation failure");
765 for (i = 0; i < items - 1; i++)
766 SDDS_CopyString(&((*statRequest)[requests].sourceColumn[i]), item[i + 1]);
767 (*statRequest)[requests].resultColumn = item[0];
768 (*statRequest)[requests].sourceColumns = items - 1;
769 (*statRequest)[requests].optionCode = code;
770 (*statRequest)[requests].sumPower = 1;
771 (*statRequest)[requests].flags = flags;
772 (*statRequest)[requests].positionColumn = NULL;
773
774 return requests + 1;
775}
776
777STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inData, STAT_REQUEST *request, long requests, long *stats, long noWarnings) {
778 STAT_DEFINITION *stat;
779 long iReq, iName, iStat;
780
781 if (!(stat = (STAT_DEFINITION *)malloc(sizeof(*stat) * requests)))
782 SDDS_Bomb("memory allocation failure");
783 for (iReq = iStat = 0; iReq < requests; iReq++) {
784 if ((stat[iStat].sourceColumns = expandColumnPairNames(inData, &request[iReq].sourceColumn, NULL, request[iReq].sourceColumns, NULL, 0, FIND_NUMERIC_TYPE, 0)) <= 0) {
785 if (!noWarnings) {
786 fprintf(stderr, "Warning: no match for column names (sddsrowstats):\n");
787 for (iName = 0; iName < request[iReq].sourceColumns; iName++)
788 fprintf(stderr, "%s, ", request[iReq].sourceColumn[iName]);
789 fputc('\n', stderr);
790 }
791 } else {
792 stat[iStat].sourceColumn = request[iReq].sourceColumn;
793 stat[iStat].resultColumn = request[iReq].resultColumn;
794 stat[iStat].optionCode = request[iReq].optionCode;
795 stat[iStat].sumPower = request[iReq].sumPower;
796 stat[iStat].flags = request[iReq].flags;
797 stat[iStat].topLimit = request[iReq].topLimit;
798 stat[iStat].bottomLimit = request[iReq].bottomLimit;
799 stat[iStat].positionColumn = request[iReq].positionColumn;
800 stat[iStat].percent = request[iReq].percent;
801 iStat++;
802 }
803 *stats = iStat;
804 }
805
806 return stat;
807}
808
809long setupOutputFile(SDDS_DATASET *outData, char *output, SDDS_DATASET *inData, STAT_DEFINITION *stat, long stats, short columnMajorOrder) {
810 long column;
811 char s[SDDS_MAXLINE];
812
813 if (!SDDS_InitializeCopy(outData, inData, output, "w"))
814 return 0;
815 if (columnMajorOrder != -1)
816 outData->layout.data_mode.column_major = columnMajorOrder;
817 else
818 outData->layout.data_mode.column_major = inData->layout.data_mode.column_major;
819 for (column = 0; column < stats; column++) {
820 if (!SDDS_TransferColumnDefinition(outData, inData, stat[column].sourceColumn[0], stat[column].resultColumn)) {
821 sprintf(s, "Problem transferring definition of column %s to %s\n", stat[column].sourceColumn[0], stat[column].resultColumn);
822 SDDS_SetError(s);
823 return 0;
824 }
825 if ((stat[column].resultIndex = SDDS_GetColumnIndex(outData, stat[column].resultColumn)) < 0) {
826 sprintf(s, "Problem creating column %s", stat[column].resultColumn);
827 SDDS_SetError(s);
828 return 0;
829 }
830 if (stat[column].positionColumn) {
831 if (!SDDS_DefineSimpleColumn(outData, stat[column].positionColumn, NULL, SDDS_STRING)) {
832 sprintf(s, "Problem define column %s\n", stat[column].positionColumn);
833 SDDS_SetError(s);
834 return 0;
835 }
836 if ((stat[column].positionColumnIndex = SDDS_GetColumnIndex(outData, stat[column].positionColumn)) < 0) {
837 sprintf(s, "Problem creating column %s", stat[column].positionColumn);
838 SDDS_SetError(s);
839 return 0;
840 }
841 }
842 if (!SDDS_ChangeColumnInformation(outData, "description", "", SDDS_SET_BY_NAME, stat[column].resultColumn) ||
843 !SDDS_ChangeColumnInformation(outData, "symbol", "", SDDS_SET_BY_NAME, stat[column].resultColumn) ||
844 !SDDS_ChangeColumnInformation(outData, "type", "double", SDDS_SET_BY_NAME | SDDS_PASS_BY_STRING, stat[column].resultColumn)) {
845 sprintf(s, "Problem changing attributes of new column %s", stat[column].resultColumn);
846 SDDS_SetError(s);
847 return 0;
848 }
849 }
850 if (!SDDS_WriteLayout(outData))
851 return 0;
852 return 1;
853}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
Utility functions for SDDS dataset manipulation and string array operations.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
Definition ipow.c:33
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long compute_percentiles(double *position, double *percent, long positions, double *x, long n)
Computes multiple percentiles of an array of doubles.
Definition median.c:84
long compute_median(double *value, double *x, long n)
Computes the median of an array of doubles.
Definition median.c:29
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.