SDDSlib
Loading...
Searching...
No Matches
sddsrunstats.c
1/*************************************************************************\
2 * Copyright (c) 2002 The University of Chicago, as Operator of Argonne
3 * National Laboratory.
4 * Copyright (c) 2002 The Regents of the University of California, as
5 * Operator of Los Alamos National Laboratory.
6 * This file is distributed subject to a Software License Agreement found
7 * in the file LICENSE that is included with this distribution.
8\*************************************************************************/
9
10/* program: sddsrunstats
11 *
12 $Log: not supported by cvs2svn $
13 Revision 1.17 2006/12/14 22:22:00 soliday
14 Updated a bunch of programs because SDDS_SaveLayout is now called by
15 SDDS_WriteLayout and it is no longer required to be called directly.
16 Also the AutoCheckMode is turned off by default now so I removed calls to
17 SDDS_SetAutoCheckMode that would attempt to turn it off. It is now up to
18 the programmer to turn it on in new programs until debugging is completed
19 and then remove the call to SDDS_SetAutoCheckMode.
20
21 Revision 1.16 2006/10/19 17:55:40 soliday
22 Updated to work with linux-x86_64.
23
24 Revision 1.15 2005/11/04 22:46:17 soliday
25 Updated code to be compiled by a 64 bit processor.
26
27 Revision 1.14 2004/03/16 23:44:24 borland
28 Fixed parsing bug that caused problems when column named looked like an abbreviated tag name
29
30 Revision 1.13 2004/03/16 23:25:38 borland
31 Added option to scanItemList to prevent problems when a column is specified
32 that happens to look like a qualifier name.
33
34 Revision 1.12 2003/06/15 17:32:33 borland
35 If number of points is 0, now does stats on the whole page.
36
37 Revision 1.11 2002/08/14 17:12:53 soliday
38 Added Open License
39
40 Revision 1.10 2001/05/18 21:35:50 borland
41 Fixed memory leak.
42
43 Revision 1.9 2001/01/10 19:35:46 soliday
44 Standardized usage message.
45
46 Revision 1.8 1999/05/25 19:14:44 soliday
47 Removed compiler warning on linux.
48
49 Revision 1.7 1999/01/06 19:54:55 borland
50 Fixed the version number in the usage message.
51
52 Revision 1.6 1998/12/16 21:26:06 borland
53 Brought up to date with new version of SDDS_TransferAllParameters.
54 Now correctly transfers through parameters, but overwrites them if it
55 needs to do so.
56
57 Revision 1.5 1998/08/14 17:13:24 borland
58 Improved the way the number of output rows is computed. Now allows
59 statistics on partial segments, if -partialOk option is given.
60
61 Revision 1.4 1998/01/10 18:26:13 borland
62 Now works properly in overlap mode with -window option.
63
64 Revision 1.3 1997/10/30 20:21:28 borland
65 Added option to block the points using a window on a specified column.
66 So, for example, one can now average points over specified time intervals.
67
68 Revision 1.2 1996/11/13 18:17:13 borland
69 Now actually does *running* statistics in addition to blocked statistics.
70
71 Revision 1.1 1996/11/13 03:44:32 borland
72 First version.
73
74 *
75 */
76#include "mdb.h"
77#include "scan.h"
78#include "SDDS.h"
79#include "SDDSutils.h"
80#include <ctype.h>
81
82/* if statistics are added, they must be added before the SET_POINTS
83 item in this list and the following options array
84*/
85#define SET_MAXIMUM 0
86#define SET_MINIMUM 1
87#define SET_MEAN 2
88#define SET_STANDARDDEVIATION 3
89#define SET_RMS 4
90#define SET_SUM 5
91#define SET_SIGMA 6
92#define SET_SAMPLE 7
93#define SET_MEDIAN 8
94#define SET_SLOPE 9
95/* Putting this here reminds us that option[0-9] and statSuffix[0-9] arrays have to line up */
96#define N_STATS 10
97#define SET_POINTS 10
98#define SET_NOOVERLAP 11
99#define SET_PIPE 12
100#define SET_WINDOW 13
101#define SET_PARTIALOK 14
102#define SET_MAJOR_ORDER 15
103#define N_OPTIONS 16
104
105char *option[N_OPTIONS] = {
106 "maximum",
107 "minimum",
108 "mean",
109 "standarddeviation",
110 "rms",
111 "sum",
112 "sigma",
113 "sample",
114 "median",
115 "slope",
116 "points",
117 "nooverlap",
118 "pipe",
119 "window",
120 "partialok",
121 "majorOrder",
122};
123
124char *statSuffix[N_STATS] = {
125 "Max",
126 "Min",
127 "Mean",
128 "StDev",
129 "RMS",
130 "Sum",
131 "Sigma",
132 "",
133 "Median",
134 "Slope"
135};
136
137#define TOPLIMIT_GIVEN 0x0001U
138#define BOTTOMLIMIT_GIVEN 0x0002U
139#define INDEPENDENT_GIVEN 0x0004U
140
141/* this structure stores a command-line request for statistics computation */
142/* individual elements of sourceColumn may contain wildcards */
143typedef struct
144{
145 char **sourceColumn, *independentColumn;
146 long sourceColumns, sumPower, optionCode;
147 unsigned long flags;
148 double topLimit, bottomLimit;
150
151/* this structure stores data necessary for accessing/creating SDDS columns and
152 * for computing a statistic
153 */
154typedef struct
155{
156 char **sourceColumn, **resultColumn, *independentColumn;
157 long sourceColumns, optionCode, *resultIndex, sumPower;
158 unsigned long flags;
159 double topLimit, bottomLimit;
161
162long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flag);
163STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inTable, STAT_REQUEST *request, long requests);
164long setupOutputFile(SDDS_DATASET *outTable, char *output, SDDS_DATASET *inTable, STAT_DEFINITION *stat, long stats, short columnMajorOrder);
165
166static char *USAGE = "sddsrunstats [<input>] [<output>] [-pipe[=input][,output]]\n\
167[{-points=<integer> | -window=column=<column>,width=<value>}] [-noOverlap]\n\
168[-partialOk]\n\
169[-mean=[<limitOps>],<columnNameList>]\n\
170[-median=[<limitOps>],<columnNameList>]\n\
171[-minimum=[<limitOps>],<columnNameList>]\n\
172[-maximum=[<limitOps>],<columnNameList>]\n\
173[-standardDeviation=[<limitOps>],<columnNameList>]\n\
174[-sigma=[<limitOps>],<columnNameList>]\n\
175[-sum=[<limitOps>][,power=<integer>],<columnNameList>] \n\
176[-sample=[<limitOps>],<columnNameList>]]\n\
177[-slope=independent=<columnName>,<columnNameList>]\n\n\
178 <limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>] [-majorOrder=row|column] \n\n\
179Computes running statistics of columns of data. The <columnNameList> may contain\n\
180wildcards, in which case an additional output column is produced for every matching\n\
181column. By default, statistics are done with a sliding window, so the values are\n\
182running statistics; for blocked statistics, use -noOverlap. For statistics on\n\
183the entire page, use -points=0.\n\
184The -partialOk option tells sddsrunstats to do computations even\n\
185if the number of available rows is less than the number of points\n\
186specified; by default, such data is simply ignored.\n\
187Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
188
189int main(int argc, char **argv) {
190 STAT_DEFINITION *stat;
191 long stats;
192 STAT_REQUEST *request;
193 long requests;
194 int64_t count;
195 SCANNED_ARG *scanned;
196 SDDS_DATASET inData, outData;
197 char *independent;
198 int32_t power;
199 int64_t pointsToStat;
200 long pointsToStat0, overlap;
201 int64_t rows, outputRowsMax, outputRows, outputRow;
202 long iArg, code, iStat, iColumn;
203 int64_t startRow, rowOffset;
204 char *input, *output, *windowColumn;
205 double *inputData, *outputData, topLimit, bottomLimit, *inputDataOffset, result, sum1, sum2, *newData;
206 double windowWidth, *windowData, slope, intercept, variance;
207 long windowIndex;
208 unsigned long pipeFlags, scanFlags, majorOrderFlag;
209 char s[100];
210 long lastRegion, region, windowRef, partialOk;
211 short columnMajorOrder = -1;
212
214 argc = scanargs(&scanned, argc, argv);
215 if (argc < 2) {
216 bomb("too few arguments", USAGE);
217 }
218 newData = NULL;
219 result = 0;
220 input = output = NULL;
221 stat = NULL;
222 request = NULL;
223 stats = requests = pipeFlags = 0;
224 pointsToStat = -1;
225 partialOk = 0;
226 overlap = 1;
227 windowColumn = NULL;
228 windowData = NULL;
229
230 for (iArg = 1; iArg < argc; iArg++) {
231 scanFlags = 0;
232 if (scanned[iArg].arg_type == OPTION) {
233 /* process options here */
234 switch (code = match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
235 case SET_MAJOR_ORDER:
236 majorOrderFlag = 0;
237 scanned[iArg].n_items--;
238 if (scanned[iArg].n_items > 0 &&
239 (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
240 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
241 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
242 SDDS_Bomb("invalid -majorOrder syntax/values");
243 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
244 columnMajorOrder = 1;
245 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
246 columnMajorOrder = 0;
247 break;
248 case SET_POINTS:
249 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%" SCNd64, &pointsToStat) != 1 ||
250 (pointsToStat <= 2 && pointsToStat != 0))
251 SDDS_Bomb("invalid -points syntax");
252 break;
253 case SET_NOOVERLAP:
254 overlap = 0;
255 break;
256 case SET_MAXIMUM:
257 case SET_MINIMUM:
258 case SET_MEAN:
259 case SET_STANDARDDEVIATION:
260 case SET_RMS:
261 case SET_SIGMA:
262 case SET_SAMPLE:
263 case SET_MEDIAN:
264 if (scanned[iArg].n_items < 2) {
265 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
266 exit(1);
267 }
268 if (!scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
269 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
270 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
271 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
272 sprintf(s, "invalid -%s syntax", scanned[iArg].list[0]);
273 SDDS_Bomb(s);
274 }
275 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
276 request[requests - 1].topLimit = topLimit;
277 request[requests - 1].bottomLimit = bottomLimit;
278 break;
279 case SET_SUM:
280 if (scanned[iArg].n_items < 2) {
281 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
282 exit(1);
283 }
284 power = 1;
285 if (!scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
286 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
287 "power", SDDS_LONG, &power, 1, 0,
288 "toplimit", SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
289 "bottomlimit", SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
290 SDDS_Bomb("invalid -sum syntax");
291 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
292 request[requests - 1].sumPower = power;
293 request[requests - 1].topLimit = topLimit;
294 request[requests - 1].bottomLimit = bottomLimit;
295 break;
296 case SET_SLOPE:
297 if (scanned[iArg].n_items < 2) {
298 fprintf(stderr, "error: invalid -%s syntax\n", option[code]);
299 exit(1);
300 }
301 if (!scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
302 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
303 "independent", SDDS_STRING, &independent, 1, INDEPENDENT_GIVEN,
304 NULL) ||
305 !(scanFlags&INDEPENDENT_GIVEN))
306 SDDS_Bomb("invalid -slope syntax");
307 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
308 request[requests - 1].independentColumn = independent;
309 request[requests - 1].topLimit = request[requests - 1].bottomLimit = 0;
310 break;
311 case SET_PIPE:
312 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
313 SDDS_Bomb("invalid -pipe syntax");
314 break;
315 case SET_WINDOW:
316 windowWidth = -1;
317 windowColumn = NULL;
318 scanned[iArg].n_items -= 1;
319 if (!scanItemList(&scanFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
320 "column", SDDS_STRING, &windowColumn, 1, 0,
321 "width", SDDS_DOUBLE, &windowWidth, 1, 0, NULL) ||
322 !windowColumn ||
323 !strlen(windowColumn) ||
324 windowWidth <= 0)
325 SDDS_Bomb("invalid -window syntax/values");
326 break;
327 case SET_PARTIALOK:
328 partialOk = 1;
329 break;
330 default:
331 fprintf(stderr, "error: unknown option '%s' given\n", scanned[iArg].list[0]);
332 exit(1);
333 break;
334 }
335 } else {
336 /* argument is filename */
337 if (!input)
338 input = scanned[iArg].list[0];
339 else if (!output)
340 output = scanned[iArg].list[0];
341 else
342 SDDS_Bomb("too many filenames seen");
343 }
344 }
345
346 if (pointsToStat < 0 && !windowColumn) {
347 pointsToStat = 10;
348 }
349 processFilenames("sddsrunstats", &input, &output, pipeFlags, 0, NULL);
350
351 if (!requests)
352 SDDS_Bomb("no statistics requested");
353
354 if (!SDDS_InitializeInput(&inData, input))
355 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
356
357 if (!(stat = compileStatDefinitions(&inData, request, requests)))
358 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
359 stats = requests;
360#ifdef DEBUG
361 fprintf(stderr, "%ld stats\n", stats);
362 for (iStat = 0; iStat < stats; iStat++) {
363 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
364 fprintf(stderr, "iStat=%ld iColumn=%ld source=%s result=%s\n", iStat, iColumn, stat[iStat].sourceColumn[iColumn], stat[iStat].resultColumn[iColumn]);
365 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN)
366 fprintf(stderr, " bottom = %e\n", stat[iStat].bottomLimit);
367 if (stat[iStat].flags & TOPLIMIT_GIVEN)
368 fprintf(stderr, " top = %e\n", stat[iStat].topLimit);
369 }
370 }
371#endif
372
373 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder))
374 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
375
376 if (windowColumn) {
377 if ((windowIndex = SDDS_GetColumnIndex(&inData, windowColumn)) < 0)
378 SDDS_Bomb("Window column not present");
379 if (!SDDS_NUMERIC_TYPE(SDDS_GetColumnType(&inData, windowIndex)))
380 SDDS_Bomb("Window column is not numeric");
381 }
382
383 outputData = NULL;
384 outputRowsMax = 0;
385 pointsToStat0 = pointsToStat;
386 while ((code = SDDS_ReadPage(&inData)) > 0) {
387 rows = SDDS_CountRowsOfInterest(&inData);
388 pointsToStat = pointsToStat0;
389 if (pointsToStat == 0)
390 pointsToStat = rows;
391 if (windowColumn && !(windowData = SDDS_GetColumnInDoubles(&inData, windowColumn))) {
392 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
393 }
394 if (!windowColumn) {
395 if (rows < pointsToStat) {
396 if (partialOk)
397 pointsToStat = rows;
398 else
399 continue;
400 }
401 if (overlap) {
402 outputRows = rows - pointsToStat + 1;
403 } else
404 outputRows = rows / pointsToStat;
405 } else
406 outputRows = rows;
407
408 if (!SDDS_StartPage(&outData, outputRows) ||
409 !SDDS_CopyParameters(&outData, &inData) ||
410 !SDDS_CopyArrays(&outData, &inData))
411 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
412 if (outputRows > outputRowsMax &&
413 !(outputData = SDDS_Realloc(outputData, sizeof(*outputData) * (outputRowsMax = outputRows))))
414 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
415 for (iStat = 0; iStat < stats; iStat++) {
416 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
417 double *indepData;
418 lastRegion = 0;
419 windowRef = 0;
420 indepData = NULL;
421 if (!(inputData = SDDS_GetColumnInDoubles(&inData, stat[iStat].sourceColumn[iColumn])))
422 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
423 if (stat[iStat].independentColumn &&
424 !(indepData = SDDS_GetColumnInDoubles(&inData, stat[iStat].independentColumn)))
425 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
426 for (outputRow = startRow = 0; outputRow < outputRows; outputRow++, startRow += (overlap ? 1 : pointsToStat)) {
427 if (windowColumn) {
428 short windowFound = 0;
429 if (overlap) {
430 windowRef += 1;
431 lastRegion = 0;
432 }
433 for (pointsToStat = 1; pointsToStat < outputRows - startRow; pointsToStat++) {
434 region = (windowData[startRow + pointsToStat] - windowData[windowRef]) / windowWidth;
435 if (region != lastRegion) {
436 lastRegion = region;
437 windowFound = 1;
438 break;
439 }
440 }
441 if (!windowFound && pointsToStat < 2)
442 break;
443 if (startRow + pointsToStat > rows) {
444 pointsToStat = rows - startRow - 1;
445 if (pointsToStat <= 0)
446 break;
447 }
448#ifdef DEBUG
449 fprintf(stderr, "row=%" PRId64 " pointsToStat=%" PRId64 " delta=%.9lf (%.9lf -> %.9lf)\n", startRow, pointsToStat, windowData[startRow + pointsToStat - 1] - windowData[startRow], windowData[startRow], windowData[startRow + pointsToStat - 1]);
450#endif
451 }
452 inputDataOffset = inputData + startRow;
453 switch (stat[iStat].optionCode) {
454 case SET_MAXIMUM:
455 result = -DBL_MAX;
456 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
457 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
458 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
459 continue;
460 if (inputDataOffset[rowOffset] > result)
461 result = inputDataOffset[rowOffset];
462 }
463 break;
464 case SET_MINIMUM:
465 result = DBL_MAX;
466 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
467 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
468 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
469 continue;
470 if (inputDataOffset[rowOffset] < result)
471 result = inputDataOffset[rowOffset];
472 }
473 break;
474 case SET_MEAN:
475 result = 0;
476 count = 0;
477 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
478 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
479 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
480 continue;
481 result += inputDataOffset[rowOffset];
482 count++;
483 }
484 if (count)
485 result /= count;
486 else
487 result = DBL_MAX;
488 break;
489 case SET_MEDIAN:
490 result = 0;
491 count = 0;
492 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
493 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
494 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
495 continue;
496 newData = SDDS_Realloc(newData, sizeof(*newData) * (count + 1));
497 newData[count] = inputDataOffset[rowOffset];
498 count++;
499 }
500 if (count) {
501 if (!compute_median(&result, newData, count))
502 result = DBL_MAX;
503 free(newData);
504 newData = NULL;
505 } else
506 result = DBL_MAX;
507 break;
508 case SET_STANDARDDEVIATION:
509 case SET_SIGMA:
510 sum1 = sum2 = count = 0;
511 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
512 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
513 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
514 continue;
515 sum1 += inputDataOffset[rowOffset];
516 sum2 += inputDataOffset[rowOffset] * inputDataOffset[rowOffset];
517 count++;
518 }
519 if (count > 1) {
520 if ((result = sum2 / count - sqr(sum1 / count)) <= 0)
521 result = 0;
522 else
523 result = sqrt(result * count / (count - 1.0));
524 if (stat[iStat].optionCode == SET_SIGMA)
525 result /= sqrt(count);
526 }
527 break;
528 case SET_RMS:
529 sum2 = count = 0;
530 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
531 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
532 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
533 continue;
534 sum2 += inputDataOffset[rowOffset] * inputDataOffset[rowOffset];
535 count++;
536 }
537 if (count > 0)
538 result = sqrt(sum2 / count);
539 else
540 result = DBL_MAX;
541 break;
542 case SET_SUM:
543 sum1 = count = 0;
544 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
545 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
546 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
547 continue;
548 sum1 += ipow(inputDataOffset[rowOffset], stat[iStat].sumPower);
549 count++;
550 }
551 if (count > 0)
552 result = sum1;
553 else
554 result = DBL_MAX;
555 break;
556 case SET_SLOPE:
557 if (!unweightedLinearFit(indepData+startRow, inputDataOffset, pointsToStat, &slope, &intercept,
558 &variance)) {
559 result = DBL_MAX;
560 } else
561 result = slope;
562 break;
563 case SET_SAMPLE:
564 result = DBL_MAX;
565 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
566 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
567 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
568 continue;
569 result = inputDataOffset[rowOffset];
570 break;
571 }
572 break;
573 default:
574 fprintf(stderr, "Unknown statistics code %ld in sddsrunave\n", stat[iStat].optionCode);
575 exit(1);
576 break;
577 }
578 outputData[outputRow] = result;
579 }
580 if (!SDDS_SetColumnFromDoubles(&outData, SDDS_SET_BY_INDEX, outputData, outputRow, stat[iStat].resultIndex[iColumn]))
581 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
582 free(inputData);
583 }
584 }
585 if (windowColumn)
586 free(windowData);
587 if (!SDDS_WritePage(&outData))
588 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
589 }
590 if (SDDS_Terminate(&inData) != 1) {
591 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
592 return (1);
593 }
594 if (SDDS_Terminate(&outData) != 1) {
595 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
596 return (1);
597 }
598 if (outputData)
599 free(outputData);
600 return (0);
601}
602
603long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, unsigned long flags) {
604 long i;
605 if (!(*statRequest = SDDS_Realloc(*statRequest, sizeof(**statRequest) * (requests + 1))) ||
606 !((*statRequest)[requests].sourceColumn = (char **)malloc(sizeof(*(*statRequest)[requests].sourceColumn) * items)))
607 SDDS_Bomb("memory allocation failure");
608 for (i = 0; i < items; i++) {
609 (*statRequest)[requests].sourceColumn[i] = item[i];
610 }
611 (*statRequest)[requests].sourceColumns = items;
612 (*statRequest)[requests].optionCode = code;
613 (*statRequest)[requests].sumPower = 1;
614 (*statRequest)[requests].flags = flags;
615 (*statRequest)[requests].independentColumn = NULL;
616 return requests + 1;
617}
618
619STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inData, STAT_REQUEST *request, long requests) {
620 STAT_DEFINITION *stat;
621 long iReq, iName;
622 char s[SDDS_MAXLINE];
623
624 if (!(stat = (STAT_DEFINITION *)malloc(sizeof(*stat) * requests)))
625 SDDS_Bomb("memory allocation failure");
626 for (iReq = 0; iReq < requests; iReq++) {
627 if ((stat[iReq].sourceColumns = expandColumnPairNames(inData, &request[iReq].sourceColumn, NULL, request[iReq].sourceColumns, NULL, 0, FIND_NUMERIC_TYPE, 0)) <= 0) {
628 fprintf(stderr, "Error: no match for column names (sddsrunstats):\n");
629 for (iName = 0; iName < request[iReq].sourceColumns; iName++)
630 fprintf(stderr, "%s, ", request[iReq].sourceColumn[iReq]);
631 fputc('\n', stderr);
632 exit(1);
633 }
634 stat[iReq].sourceColumn = request[iReq].sourceColumn;
635 if (!(stat[iReq].resultColumn = malloc(sizeof(*stat[iReq].resultColumn) * stat[iReq].sourceColumns)) ||
636 !(stat[iReq].resultIndex = malloc(sizeof(*stat[iReq].resultIndex) * stat[iReq].sourceColumns))) {
637 SDDS_Bomb("memory allocation failure");
638 }
639 for (iName = 0; iName < stat[iReq].sourceColumns; iName++) {
640 sprintf(s, "%s%s", stat[iReq].sourceColumn[iName], statSuffix[request[iReq].optionCode]);
641 SDDS_CopyString(stat[iReq].resultColumn + iName, s);
642 }
643 stat[iReq].optionCode = request[iReq].optionCode;
644 stat[iReq].sumPower = request[iReq].sumPower;
645 stat[iReq].flags = request[iReq].flags;
646 stat[iReq].topLimit = request[iReq].topLimit;
647 stat[iReq].bottomLimit = request[iReq].bottomLimit;
648 stat[iReq].independentColumn = request[iReq].independentColumn;
649 }
650
651 return stat;
652}
653
654long setupOutputFile(SDDS_DATASET *outData, char *output, SDDS_DATASET *inData, STAT_DEFINITION *stat, long stats, short columnMajorOrder) {
655 long column, iStat;
656 char s[SDDS_MAXLINE];
657
658 if (!SDDS_InitializeOutput(outData, SDDS_BINARY, 1, NULL, NULL, output))
659 return 0;
660 if (columnMajorOrder != -1)
661 outData->layout.data_mode.column_major = columnMajorOrder;
662 else
663 outData->layout.data_mode.column_major = inData->layout.data_mode.column_major;
664 for (iStat = 0; iStat < stats; iStat++) {
665 for (column = 0; column < stat[iStat].sourceColumns; column++) {
666 if (!SDDS_TransferColumnDefinition(outData, inData, stat[iStat].sourceColumn[column], stat[iStat].resultColumn[column])) {
667 sprintf(s, "Problem transferring definition of column %s to %s\n", stat[iStat].sourceColumn[column], stat[iStat].resultColumn[column]);
668 SDDS_SetError(s);
669 return 0;
670 }
671 if ((stat[iStat].resultIndex[column] = SDDS_GetColumnIndex(outData, stat[iStat].resultColumn[column])) < 0) {
672 sprintf(s, "Problem creating column %s", stat[iStat].resultColumn[column]);
673 SDDS_SetError(s);
674 return 0;
675 }
676 if (!SDDS_ChangeColumnInformation(outData, "description", NULL, SDDS_SET_BY_NAME, stat[iStat].resultColumn[column]) ||
677 !SDDS_ChangeColumnInformation(outData, "symbol", NULL, SDDS_SET_BY_NAME, stat[iStat].resultColumn[column]) ||
678 !SDDS_ChangeColumnInformation(outData, "type", "double", SDDS_SET_BY_NAME | SDDS_PASS_BY_STRING, stat[iStat].resultColumn[column])) {
679 sprintf(s, "Problem changing attributes of new column %s", stat[iStat].resultColumn[column]);
680 SDDS_SetError(s);
681 return 0;
682 }
683 }
684 }
685 if (!SDDS_TransferAllParameterDefinitions(outData, inData, SDDS_TRANSFER_KEEPOLD) ||
686 !SDDS_TransferAllArrayDefinitions(outData, inData, 0) ||
687 !SDDS_WriteLayout(outData))
688 return 0;
689 return 1;
690}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:334
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllArrayDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all array definitions from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
Definition SDDStypes.h:138
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
Definition ipow.c:33
long unweightedLinearFit(double *xData, double *yData, long nData, double *slope, double *intercept, double *variance)
Performs an unweighted linear fit on the provided data.
Definition linfit.c:37
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long compute_median(double *value, double *x, long n)
Computes the median of an array of doubles.
Definition median.c:29
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.