SDDSlib
Loading...
Searching...
No Matches
sddsrowstats.c
Go to the documentation of this file.
1/**
2 * @file sddsrowstats.c
3 * @brief Computes statistics for rows across multiple columns in SDDS datasets.
4 *
5 * @details
6 * The `sddsrowstats` program processes an SDDS dataset, computes specified statistics
7 * for each row across selected columns, and adds new columns with the results to the
8 * output dataset. It supports various statistical operations such as mean, median,
9 * standard deviation, sum, and more, with options to limit the data considered for
10 * each computation.
11 *
12 * @usage
13 * sddsrowstats [<input>] [<output>] [options]
14 *
15 * @options
16 * -pipe[=input][,output]
17 * -nowarnings
18 * -mean=<newName>[,<limitOps>],<columnNameList>
19 * -rms=<newName>[,<limitOps>],<columnNameList>
20 * -median=<newName>[,<limitOps>],<columnNameList>
21 * -minimum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>
22 * -maximum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>
23 * -standardDeviation=<newName>[,<limitOps>],<columnNameList>
24 * -sigma=<newName>[,<limitOps>],<columnNameList>
25 * -mad=<newName>[,<limitOps>],<columnNameList>
26 * -sum=<newName>[,<limitOps>][,power=<integer>],<columnNameList>
27 * -spread=<newName>[,<limitOps>],<columnNameList>
28 * -drange=<newName>[,<limitOps>],<columnNameList>
29 * -qrange=<newName>[,<limitOps>],<columnNameList>
30 * -smallest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>
31 * -largest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>
32 * -count=<newName>[,<limitOps>],<columnNameList>
33 * -percentile=<newName>[,<limitOps>],value=<percent>,<columnNameList>
34 * -majorOrder=row|column
35 * -threads=<number>
36 *
37 * <limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>]
38 *
39 * Each statistic option requires specifying a new column name and a list of source
40 * columns, which can include wildcards.
41 *
42 * @copyright
43 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
44 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
45 *
46 * @license
47 * This file is distributed under the terms of the Software License Agreement
48 * found in the file LICENSE included with this distribution.
49 *
50 * @author M. Borland, R. Soliday, H. Shang
51 */
52
53#include "mdb.h"
54#include "scan.h"
55#include "SDDS.h"
56#include "SDDSutils.h"
57#include <ctype.h>
58
59/* Enumeration for option types */
60enum option_type {
61 SET_MAXIMUM,
62 SET_MINIMUM,
63 SET_MEAN,
64 SET_STANDARDDEVIATION,
65 SET_RMS,
66 SET_SUM,
67 SET_SIGMA,
68 SET_COUNT,
69 SET_PIPE,
70 SET_MEDIAN,
71 SET_MAD,
72 SET_NOWARNINGS,
73 SET_DRANGE,
74 SET_QRANGE,
75 SET_LARGEST,
76 SET_SMALLEST,
77 SET_SPREAD_ARG,
78 SET_PERCENTILE,
79 SET_MAJOR_ORDER,
80 SET_THREADS,
81 N_OPTIONS
82};
83
84char *option[N_OPTIONS] = {
85 "maximum",
86 "minimum",
87 "mean",
88 "standarddeviation",
89 "rms",
90 "sum",
91 "sigma",
92 "count",
93 "pipe",
94 "median",
95 "mad",
96 "nowarnings",
97 "drange",
98 "qrange",
99 "largest",
100 "smallest",
101 "spread",
102 "percentile",
103 "majorOrder",
104 "threads",
105};
106
107#define TOPLIMIT_GIVEN 0x0001U
108#define BOTTOMLIMIT_GIVEN 0x0002U
109#define POSITIONCOLUMN_GIVEN 0x0004U
110#define PERCENT_GIVEN 0x0008U
111
112/* this structure stores a command-line request for statistics computation */
113/* individual elements of sourceColumn may contain wildcards */
114typedef struct
115{
116 char **sourceColumn, *resultColumn, *positionColumn;
117 long sourceColumns, sumPower, optionCode, positionColumnIndex;
118 double percent;
119 unsigned long flags;
120 double topLimit, bottomLimit;
122
123/* this structure stores data necessary for accessing/creating SDDS columns and
124 * for computing a statistic
125 */
126typedef struct
127{
128 char **sourceColumn, *resultColumn, *positionColumn;
129 long sourceColumns, optionCode, resultIndex, sumPower, positionColumnIndex;
130 double percent;
131 unsigned long flags;
132 double topLimit, bottomLimit;
134
135long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flag);
136STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inTable, STAT_REQUEST *request, long requests, long *stats, long noWarnings);
137long setupOutputFile(SDDS_DATASET *outTable, char *output, SDDS_DATASET *inTable, STAT_DEFINITION *stat, long stats, short columnMajorOrder);
138
139static char *USAGE =
140 "Usage: sddsrowstats [<input>] [<output>] [options]\n"
141 "\nOptions:\n"
142 " -pipe[=input][,output]\n"
143 " Use pipe for input and/or output.\n"
144 " -nowarnings\n"
145 " Suppress warning messages.\n"
146 " -mean=<newName>[,<limitOps>],<columnNameList>\n"
147 " Compute the mean of the specified columns.\n"
148 " -rms=<newName>[,<limitOps>],<columnNameList>\n"
149 " Compute the root mean square of the specified columns.\n"
150 " -median=<newName>[,<limitOps>],<columnNameList>\n"
151 " Compute the median of the specified columns.\n"
152 " -minimum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
153 " Compute the minimum value among the specified columns.\n"
154 " -maximum=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
155 " Compute the maximum value among the specified columns.\n"
156 " -standardDeviation=<newName>[,<limitOps>],<columnNameList>\n"
157 " Compute the standard deviation of the specified columns.\n"
158 " -sigma=<newName>[,<limitOps>],<columnNameList>\n"
159 " Compute the sigma (standard deviation) of the specified columns.\n"
160 " -mad=<newName>[,<limitOps>],<columnNameList>\n"
161 " Compute the median absolute deviation (MAD) of the specified columns.\n"
162 " -sum=<newName>[,<limitOps>][,power=<integer>],<columnNameList>\n"
163 " Compute the sum of the specified columns.\n"
164 " -spread=<newName>[,<limitOps>],<columnNameList>\n"
165 " Compute the spread (max - min) of the specified columns.\n"
166 " -drange=<newName>[,<limitOps>],<columnNameList>\n"
167 " Compute the decile range of the specified columns.\n"
168 " -qrange=<newName>[,<limitOps>],<columnNameList>\n"
169 " Compute the quartile range of the specified columns.\n"
170 " -smallest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
171 " Compute the smallest absolute value among the specified columns.\n"
172 " -largest=<newName>[,positionColumn=<name>][,<limitOps>],<columnNameList>\n"
173 " Compute the largest absolute value among the specified columns.\n"
174 " -count=<newName>[,<limitOps>],<columnNameList>\n"
175 " Count the number of valid entries in the specified columns.\n"
176 " -percentile=<newName>[,<limitOps>],value=<percent>,<columnNameList>\n"
177 " Compute the specified percentile of the given columns.\n"
178 " -majorOrder=row|column\n"
179 " Set the data ordering to row-major or column-major.\n"
180 " -threads=<number>\n"
181 " Specify the number of threads to use for computations.\n"
182 "\n<limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>]\n"
183 "\nComputes statistics for each row of each input table, adding new columns to the\n"
184 "output table. Each row statistic is done using data from the columns listed in\n"
185 "<columnNameList>, which is a comma-separated list of optionally-wildcarded column\n"
186 "names. positionColumn=<name> for minimum, maximum, smallest, largest option is to store \n"
187 "the corresponding column name of the minimum, maximum, smallest, or largest in each row to \n"
188 "the new output column - <name>.\n"
189 "\nProgram by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
190
191int main(int argc, char **argv) {
192 STAT_DEFINITION *stat;
193 long stats;
194 STAT_REQUEST *request;
195 long requests;
196 SCANNED_ARG *scanned;
197 SDDS_DATASET inData, outData;
198
199 int32_t power;
200 long i_arg, code, iStat, tmpFileUsed, iColumn, posColIndex;
201 int64_t rows, row, count;
202 long noWarnings, maxSourceColumns;
203 char *input, *output, *positionColumn, **posColumnName;
204 double **inputData, *outputData, value1, value2, topLimit, bottomLimit;
205 unsigned long pipeFlags, scanFlags, majorOrderFlag;
206 char s[100];
207 double *statWorkArray;
208 double quartilePoint[2] = {25.0, 75.0}, quartileResult[2];
209 double decilePoint[2] = {10.0, 90.0}, decileResult[2];
210 double percent;
211 short columnMajorOrder = -1;
212 int threads = 1;
213
215 argc = scanargs(&scanned, argc, argv);
216 if (argc < 2) {
217 bomb("too few arguments", USAGE);
218 }
219
220 posColumnName = NULL;
221 input = output = positionColumn = NULL;
222 stat = NULL;
223 request = NULL;
224 stats = requests = pipeFlags = 0;
225 noWarnings = 0;
226 outputData = NULL;
227 statWorkArray = NULL;
228
229 for (i_arg = 1; i_arg < argc; i_arg++) {
230 scanFlags = 0;
231 if (scanned[i_arg].arg_type == OPTION) {
232 /* process options here */
233 switch (code = match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
234 case SET_MAXIMUM:
235 case SET_MINIMUM:
236 case SET_MEAN:
237 case SET_MEDIAN:
238 case SET_STANDARDDEVIATION:
239 case SET_RMS:
240 case SET_SIGMA:
241 case SET_MAD:
242 case SET_COUNT:
243 case SET_DRANGE:
244 case SET_QRANGE:
245 case SET_SMALLEST:
246 case SET_LARGEST:
247 case SET_SPREAD_ARG:
248 if (scanned[i_arg].n_items < 3) {
249 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
250 exit(EXIT_FAILURE);
251 }
252 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
253 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
254 SCANITEMLIST_IGNORE_VALUELESS,
255 "positionColumn", SDDS_STRING, &positionColumn, 1, POSITIONCOLUMN_GIVEN,
256 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
257 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
258 sprintf(s, "invalid -%s syntax", scanned[i_arg].list[0]);
259 SDDS_Bomb(s);
260 }
261 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
262 request[requests - 1].topLimit = topLimit;
263 request[requests - 1].bottomLimit = bottomLimit;
264 if (positionColumn) {
265 if (code == SET_MAXIMUM || code == SET_MINIMUM || code == SET_LARGEST || code == SET_SMALLEST)
266 SDDS_CopyString(&request[requests - 1].positionColumn, positionColumn);
267 free(positionColumn);
268 positionColumn = NULL;
269 }
270 break;
271 case SET_PERCENTILE:
272 if (scanned[i_arg].n_items < 3) {
273 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
274 exit(EXIT_FAILURE);
275 }
276 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
277 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
278 SCANITEMLIST_IGNORE_VALUELESS,
279 "value", SDDS_DOUBLE, &percent, 1, PERCENT_GIVEN,
280 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
281 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL) ||
282 !(scanFlags & PERCENT_GIVEN) || percent <= 0 || percent >= 100)
283 SDDS_Bomb("invalid -percentile syntax");
284 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
285 request[requests - 1].percent = percent;
286 request[requests - 1].topLimit = topLimit;
287 request[requests - 1].bottomLimit = bottomLimit;
288 break;
289 case SET_SUM:
290 if (scanned[i_arg].n_items < 3) {
291 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
292 exit(EXIT_FAILURE);
293 }
294 power = 1;
295 if (!scanItemList(&scanFlags, scanned[i_arg].list, &scanned[i_arg].n_items,
296 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
297 "power", SDDS_LONG, &power, 1, 0,
298 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
299 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
300 SDDS_Bomb("invalid -sum syntax");
301 requests = addStatRequests(&request, requests, scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, code, scanFlags);
302 request[requests - 1].sumPower = power;
303 request[requests - 1].topLimit = topLimit;
304 request[requests - 1].bottomLimit = bottomLimit;
305 break;
306 case SET_PIPE:
307 if (!processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipeFlags))
308 SDDS_Bomb("invalid -pipe syntax");
309 break;
310 case SET_NOWARNINGS:
311 noWarnings = 1;
312 break;
313 case SET_MAJOR_ORDER:
314 majorOrderFlag = 0;
315 scanned[i_arg].n_items--;
316 if (scanned[i_arg].n_items > 0 &&
317 (!scanItemList(&majorOrderFlag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
318 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
319 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
320 SDDS_Bomb("invalid -majorOrder syntax/values");
321 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
322 columnMajorOrder = 1;
323 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
324 columnMajorOrder = 0;
325 break;
326 case SET_THREADS:
327 if (scanned[i_arg].n_items != 2 ||
328 !sscanf(scanned[i_arg].list[1], "%d", &threads) || threads < 1)
329 SDDS_Bomb("invalid -threads syntax");
330 break;
331 default:
332 fprintf(stderr, "error: unknown option '%s' given\n", scanned[i_arg].list[0]);
333 exit(EXIT_FAILURE);
334 break;
335 }
336 } else {
337 /* argument is filename */
338 if (!input)
339 input = scanned[i_arg].list[0];
340 else if (!output)
341 output = scanned[i_arg].list[0];
342 else
343 SDDS_Bomb("too many filenames seen");
344 }
345 }
346
347 processFilenames("sddsrowstats", &input, &output, pipeFlags, noWarnings, &tmpFileUsed);
348
349 if (!requests)
350 SDDS_Bomb("no statistics requested");
351
352 if (!SDDS_InitializeInput(&inData, input))
353 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
354
355 if (!(stat = compileStatDefinitions(&inData, request, requests, &stats, noWarnings))) {
356 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
357 exit(EXIT_FAILURE);
358 }
359 if (stats < 0)
360 SDDS_Bomb("No valid statistics requests.");
361 for (iStat = maxSourceColumns = 0; iStat < stats; iStat++) {
362 if (stat[iStat].sourceColumns > maxSourceColumns)
363 maxSourceColumns = stat[iStat].sourceColumns;
364 }
365 if (!(statWorkArray = malloc(sizeof(*statWorkArray) * maxSourceColumns)))
366 SDDS_Bomb("allocation failure (statWorkArray)");
367
368 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder)) {
369 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
370 exit(EXIT_FAILURE);
371 }
372
373 inputData = NULL;
374
375 while ((code = SDDS_ReadPage(&inData)) > 0) {
376 if (!SDDS_CopyPage(&outData, &inData)) {
377 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
378 exit(EXIT_FAILURE);
379 }
380 if ((rows = SDDS_CountRowsOfInterest(&inData))) {
381 if (!(outputData = (double *)malloc(sizeof(*outputData) * rows))) {
382 SDDS_Bomb("memory allocation failure");
383 }
384 if (!(posColumnName = (char **)malloc(sizeof(*posColumnName) * rows))) {
385 SDDS_Bomb("memory allocation failure");
386 }
387 for (iStat = 0; iStat < stats; iStat++) {
388 if (!(inputData = (double **)malloc(sizeof(*inputData) * stat[iStat].sourceColumns))) {
389 SDDS_Bomb("memory allocation failure");
390 }
391 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
392 if (!(inputData[iColumn] = SDDS_GetColumnInDoubles(&inData, stat[iStat].sourceColumn[iColumn]))) {
393 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
394 }
395 }
396 for (row = 0; row < rows; row++)
397 outputData[row] = DBL_MAX;
398 switch (stat[iStat].optionCode) {
399 case SET_MINIMUM:
400 for (row = 0; row < rows; row++) {
401 value1 = DBL_MAX;
402 posColIndex = 0;
403 posColumnName[row] = NULL;
404 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
405 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
406 continue;
407 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
408 continue;
409 if (inputData[iColumn][row] < value1) {
410 value1 = inputData[iColumn][row];
411 posColIndex = iColumn;
412 }
413 }
414 outputData[row] = value1;
415 if (stat[iStat].positionColumn)
416 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
417 }
418 break;
419 case SET_MAXIMUM:
420 for (row = 0; row < rows; row++) {
421 posColIndex = 0;
422 value1 = -DBL_MAX;
423 posColumnName[row] = NULL;
424 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
425 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
426 continue;
427 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
428 continue;
429 if (inputData[iColumn][row] > value1) {
430 posColIndex = iColumn;
431 value1 = inputData[iColumn][row];
432 }
433 }
434 outputData[row] = value1;
435 if (stat[iStat].positionColumn)
436 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
437 }
438 break;
439 case SET_MEAN:
440 for (row = 0; row < rows; row++) {
441 value1 = 0;
442 count = 0;
443 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
444 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
445 continue;
446 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
447 continue;
448 value1 += inputData[iColumn][row];
449 count++;
450 }
451 if (count)
452 outputData[row] = value1 / count;
453 }
454 break;
455 case SET_MEDIAN:
456 for (row = 0; row < rows; row++) {
457 for (iColumn = count = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
458 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
459 continue;
460 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
461 continue;
462 statWorkArray[count] = inputData[iColumn][row];
463 count++;
464 }
465 if (count)
466 compute_median(outputData + row, statWorkArray, count);
467 }
468 break;
469 case SET_STANDARDDEVIATION:
470 for (row = 0; row < rows; row++) {
471 value1 = 0;
472 value2 = 0;
473 count = 0;
474 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
475 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
476 continue;
477 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
478 continue;
479 value1 += inputData[iColumn][row];
480 value2 += inputData[iColumn][row] * inputData[iColumn][row];
481 count++;
482 }
483 if (count > 1) {
484 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
485 outputData[row] = 0;
486 else
487 outputData[row] = sqrt(value1 * count / (count - 1.0));
488 }
489 }
490 break;
491 case SET_SIGMA:
492 for (row = 0; row < rows; row++) {
493 value1 = 0;
494 value2 = 0;
495 count = 0;
496 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
497 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
498 continue;
499 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
500 continue;
501 value1 += inputData[iColumn][row];
502 value2 += inputData[iColumn][row] * inputData[iColumn][row];
503 count++;
504 }
505 if (count > 1) {
506 if ((value1 = value2 / count - sqr(value1 / count)) <= 0)
507 outputData[row] = 0;
508 else
509 outputData[row] = sqrt(value1 / (count - 1.0));
510 }
511 }
512 break;
513 case SET_RMS:
514 for (row = 0; row < rows; row++) {
515 value1 = 0;
516 count = 0;
517 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
518 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
519 continue;
520 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
521 continue;
522 value1 += sqr(inputData[iColumn][row]);
523 count++;
524 }
525 if (count)
526 outputData[row] = sqrt(value1 / count);
527 }
528 break;
529 case SET_SUM:
530 for (row = 0; row < rows; row++) {
531 value1 = 0;
532 count = 0;
533 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
534 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
535 continue;
536 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
537 continue;
538 value1 += ipow(inputData[iColumn][row], stat[iStat].sumPower);
539 count++;
540 }
541 if (count)
542 outputData[row] = value1;
543 }
544 break;
545 case SET_COUNT:
546 for (row = 0; row < rows; row++) {
547 count = 0;
548 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
549 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
550 continue;
551 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
552 continue;
553 count++;
554 }
555 outputData[row] = count;
556 }
557 break;
558 case SET_MAD:
559 for (row = 0; row < rows; row++) {
560 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
561 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
562 continue;
563 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
564 continue;
565 statWorkArray[count] = inputData[iColumn][row];
566 count++;
567 }
568 if (count)
569 computeMomentsThreaded(NULL, NULL, NULL, &outputData[row], statWorkArray, count, threads);
570 }
571 break;
572 case SET_DRANGE:
573 for (row = 0; row < rows; row++) {
574 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
575 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
576 continue;
577 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
578 continue;
579 statWorkArray[count] = inputData[iColumn][row];
580 count++;
581 }
582 if (count && compute_percentiles(decileResult, decilePoint, 2, statWorkArray, count))
583 outputData[row] = decileResult[1] - decileResult[0];
584 }
585 break;
586 case SET_QRANGE:
587 for (row = 0; row < rows; row++) {
588 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
589 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
590 continue;
591 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
592 continue;
593 statWorkArray[count] = inputData[iColumn][row];
594 count++;
595 }
596 if (count && compute_percentiles(quartileResult, quartilePoint, 2, statWorkArray, count))
597 outputData[row] = quartileResult[1] - quartileResult[0];
598 }
599 break;
600 case SET_SMALLEST:
601 for (row = 0; row < rows; row++) {
602 value1 = DBL_MAX;
603 posColIndex = 0;
604 posColumnName[row] = NULL;
605 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
606 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
607 continue;
608 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
609 continue;
610 if ((value2 = fabs(inputData[iColumn][row])) < value1) {
611 posColIndex = iColumn;
612 value1 = value2;
613 }
614 }
615 outputData[row] = value1;
616 if (stat[iStat].positionColumn)
617 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
618 }
619 break;
620 case SET_LARGEST:
621 for (row = 0; row < rows; row++) {
622 value1 = 0;
623 posColIndex = 0;
624 posColumnName[row] = NULL;
625 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
626 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
627 continue;
628 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
629 continue;
630 if ((value2 = fabs(inputData[iColumn][row])) > value1) {
631 posColIndex = iColumn;
632 value1 = value2;
633 }
634 }
635 outputData[row] = value1;
636 if (stat[iStat].positionColumn)
637 posColumnName[row] = stat[iStat].sourceColumn[posColIndex];
638 }
639 break;
640 case SET_SPREAD_ARG:
641 for (row = 0; row < rows; row++) {
642 value1 = DBL_MAX; /* min */
643 value2 = -DBL_MAX; /* max */
644 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
645 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
646 continue;
647 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
648 continue;
649 if (inputData[iColumn][row] < value1)
650 value1 = inputData[iColumn][row];
651 if (inputData[iColumn][row] > value2)
652 value2 = inputData[iColumn][row];
653 }
654 outputData[row] = value2 - value1;
655 }
656 break;
657 case SET_PERCENTILE:
658 for (row = 0; row < rows; row++) {
659 for (iColumn = count = value1 = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
660 if (stat[iStat].flags & TOPLIMIT_GIVEN && inputData[iColumn][row] > stat[iStat].topLimit)
661 continue;
662 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputData[iColumn][row] < stat[iStat].bottomLimit)
663 continue;
664 statWorkArray[count] = inputData[iColumn][row];
665 count++;
666 }
667 outputData[row] = HUGE_VAL;
668 if (count)
669 compute_percentiles(&outputData[row], &stat[iStat].percent, 1, statWorkArray, count);
670 }
671 break;
672 default:
673 SDDS_Bomb("invalid statistic code (accumulation loop)");
674 break;
675 }
676 if (!SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, outputData, rows, stat[iStat].resultIndex))
677 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
678
679 if (stat[iStat].positionColumn) {
680 if (!SDDS_SetColumn(&outData, SDDS_SET_BY_INDEX, posColumnName, rows, stat[iStat].positionColumnIndex))
681 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
682 }
683 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
684 free(inputData[iColumn]);
685 free(inputData);
686 inputData = NULL;
687 }
688 free(outputData);
689 outputData = NULL;
690 free(posColumnName);
691 posColumnName = NULL;
692 }
693 if (!SDDS_WritePage(&outData))
694 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
695 }
696 free_scanargs(&scanned, argc);
697 for (iStat = 0; iStat < stats; iStat++) {
698 if (stat[iStat].positionColumn)
699 free(stat[iStat].positionColumn);
700 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++)
701 free(stat[iStat].sourceColumn[iColumn]);
702 free(stat[iStat].sourceColumn);
703 }
704 free(request);
705 free(stat);
706 if (statWorkArray)
707 free(statWorkArray);
708
709 if (!SDDS_Terminate(&inData) || !SDDS_Terminate(&outData)) {
710 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
711 exit(EXIT_FAILURE);
712 }
713 if (tmpFileUsed && !replaceFileAndBackUp(input, output))
714 exit(EXIT_FAILURE);
715 return EXIT_SUCCESS;
716}
717
718long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flags) {
719 long i;
720 if (!(*statRequest = SDDS_Realloc(*statRequest, sizeof(**statRequest) * (requests + 1))) ||
721 !((*statRequest)[requests].sourceColumn = (char **)malloc(sizeof(*(*statRequest)[requests].sourceColumn) * (items - 1))))
722 SDDS_Bomb("memory allocation failure");
723 for (i = 0; i < items - 1; i++)
724 SDDS_CopyString(&((*statRequest)[requests].sourceColumn[i]), item[i + 1]);
725 (*statRequest)[requests].resultColumn = item[0];
726 (*statRequest)[requests].sourceColumns = items - 1;
727 (*statRequest)[requests].optionCode = code;
728 (*statRequest)[requests].sumPower = 1;
729 (*statRequest)[requests].flags = flags;
730 (*statRequest)[requests].positionColumn = NULL;
731
732 return requests + 1;
733}
734
735STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inData, STAT_REQUEST *request, long requests, long *stats, long noWarnings) {
736 STAT_DEFINITION *stat;
737 long iReq, iName, iStat;
738
739 if (!(stat = (STAT_DEFINITION *)malloc(sizeof(*stat) * requests)))
740 SDDS_Bomb("memory allocation failure");
741 for (iReq = iStat = 0; iReq < requests; iReq++) {
742 if ((stat[iStat].sourceColumns = expandColumnPairNames(inData, &request[iReq].sourceColumn, NULL, request[iReq].sourceColumns, NULL, 0, FIND_NUMERIC_TYPE, 0)) <= 0) {
743 if (!noWarnings) {
744 fprintf(stderr, "Warning: no match for column names (sddsrowstats):\n");
745 for (iName = 0; iName < request[iReq].sourceColumns; iName++)
746 fprintf(stderr, "%s, ", request[iReq].sourceColumn[iName]);
747 fputc('\n', stderr);
748 }
749 } else {
750 stat[iStat].sourceColumn = request[iReq].sourceColumn;
751 stat[iStat].resultColumn = request[iReq].resultColumn;
752 stat[iStat].optionCode = request[iReq].optionCode;
753 stat[iStat].sumPower = request[iReq].sumPower;
754 stat[iStat].flags = request[iReq].flags;
755 stat[iStat].topLimit = request[iReq].topLimit;
756 stat[iStat].bottomLimit = request[iReq].bottomLimit;
757 stat[iStat].positionColumn = request[iReq].positionColumn;
758 stat[iStat].percent = request[iReq].percent;
759 iStat++;
760 }
761 *stats = iStat;
762 }
763
764 return stat;
765}
766
767long setupOutputFile(SDDS_DATASET *outData, char *output, SDDS_DATASET *inData, STAT_DEFINITION *stat, long stats, short columnMajorOrder) {
768 long column;
769 char s[SDDS_MAXLINE];
770
771 if (!SDDS_InitializeCopy(outData, inData, output, "w"))
772 return 0;
773 if (columnMajorOrder != -1)
774 outData->layout.data_mode.column_major = columnMajorOrder;
775 else
776 outData->layout.data_mode.column_major = inData->layout.data_mode.column_major;
777 for (column = 0; column < stats; column++) {
778 if (!SDDS_TransferColumnDefinition(outData, inData, stat[column].sourceColumn[0], stat[column].resultColumn)) {
779 sprintf(s, "Problem transferring definition of column %s to %s\n", stat[column].sourceColumn[0], stat[column].resultColumn);
780 SDDS_SetError(s);
781 return 0;
782 }
783 if ((stat[column].resultIndex = SDDS_GetColumnIndex(outData, stat[column].resultColumn)) < 0) {
784 sprintf(s, "Problem creating column %s", stat[column].resultColumn);
785 SDDS_SetError(s);
786 return 0;
787 }
788 if (stat[column].positionColumn) {
789 if (!SDDS_DefineSimpleColumn(outData, stat[column].positionColumn, NULL, SDDS_STRING)) {
790 sprintf(s, "Problem define column %s\n", stat[column].positionColumn);
791 SDDS_SetError(s);
792 return 0;
793 }
794 if ((stat[column].positionColumnIndex = SDDS_GetColumnIndex(outData, stat[column].positionColumn)) < 0) {
795 sprintf(s, "Problem creating column %s", stat[column].positionColumn);
796 SDDS_SetError(s);
797 return 0;
798 }
799 }
800 if (!SDDS_ChangeColumnInformation(outData, "description", "", SDDS_SET_BY_NAME, stat[column].resultColumn) ||
801 !SDDS_ChangeColumnInformation(outData, "symbol", "", SDDS_SET_BY_NAME, stat[column].resultColumn) ||
802 !SDDS_ChangeColumnInformation(outData, "type", "double", SDDS_SET_BY_NAME | SDDS_PASS_BY_STRING, stat[column].resultColumn)) {
803 sprintf(s, "Problem changing attributes of new column %s", stat[column].resultColumn);
804 SDDS_SetError(s);
805 return 0;
806 }
807 }
808 if (!SDDS_WriteLayout(outData))
809 return 0;
810 return 1;
811}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
Definition ipow.c:33
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long compute_percentiles(double *position, double *percent, long positions, double *x, long n)
Computes multiple percentiles of an array of doubles.
Definition median.c:84
long compute_median(double *value, double *x, long n)
Computes the median of an array of doubles.
Definition median.c:29
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.