88#define SET_STANDARDDEVIATION 3
98#define SET_NOOVERLAP 11
101#define SET_PARTIALOK 14
102#define SET_MAJOR_ORDER 15
105char *option[N_OPTIONS] = {
124char *statSuffix[N_STATS] = {
137#define TOPLIMIT_GIVEN 0x0001U
138#define BOTTOMLIMIT_GIVEN 0x0002U
139#define INDEPENDENT_GIVEN 0x0004U
145 char **sourceColumn, *independentColumn;
146 long sourceColumns, sumPower, optionCode;
148 double topLimit, bottomLimit;
156 char **sourceColumn, **resultColumn, *independentColumn;
157 long sourceColumns, optionCode, *resultIndex, sumPower;
159 double topLimit, bottomLimit;
162long addStatRequests(
STAT_REQUEST **statRequest,
long requests,
char **item,
long items,
long code,
unsigned long flag);
166static char *USAGE =
"sddsrunstats [<input>] [<output>] [-pipe[=input][,output]]\n\
167[{-points=<integer> | -window=column=<column>,width=<value>}] [-noOverlap]\n\
169[-mean=[<limitOps>],<columnNameList>]\n\
170[-median=[<limitOps>],<columnNameList>]\n\
171[-minimum=[<limitOps>],<columnNameList>]\n\
172[-maximum=[<limitOps>],<columnNameList>]\n\
173[-standardDeviation=[<limitOps>],<columnNameList>]\n\
174[-sigma=[<limitOps>],<columnNameList>]\n\
175[-sum=[<limitOps>][,power=<integer>],<columnNameList>] \n\
176[-sample=[<limitOps>],<columnNameList>]]\n\
177[-slope=independent=<columnName>,<columnNameList>]\n\n\
178 <limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>] [-majorOrder=row|column] \n\n\
179Computes running statistics of columns of data. The <columnNameList> may contain\n\
180wildcards, in which case an additional output column is produced for every matching\n\
181column. By default, statistics are done with a sliding window, so the values are\n\
182running statistics; for blocked statistics, use -noOverlap. For statistics on\n\
183the entire page, use -points=0.\n\
184The -partialOk option tells sddsrunstats to do computations even\n\
185if the number of available rows is less than the number of points\n\
186specified; by default, such data is simply ignored.\n\
187Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
189int main(
int argc,
char **argv) {
195 SCANNED_ARG *scanned;
199 int64_t pointsToStat;
200 long pointsToStat0, overlap;
201 int64_t rows, outputRowsMax, outputRows, outputRow;
202 long iArg, code, iStat, iColumn;
203 int64_t startRow, rowOffset;
204 char *input, *output, *windowColumn;
205 double *inputData, *outputData, topLimit, bottomLimit, *inputDataOffset, result, sum1, sum2, *newData;
206 double windowWidth, *windowData, slope, intercept, variance;
208 unsigned long pipeFlags, scanFlags, majorOrderFlag;
210 long lastRegion, region, windowRef, partialOk;
211 short columnMajorOrder = -1;
214 argc =
scanargs(&scanned, argc, argv);
216 bomb(
"too few arguments", USAGE);
220 input = output = NULL;
223 stats = requests = pipeFlags = 0;
230 for (iArg = 1; iArg < argc; iArg++) {
232 if (scanned[iArg].arg_type == OPTION) {
234 switch (code =
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
235 case SET_MAJOR_ORDER:
237 scanned[iArg].n_items--;
238 if (scanned[iArg].n_items > 0 &&
239 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
240 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
241 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
242 SDDS_Bomb(
"invalid -majorOrder syntax/values");
243 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
244 columnMajorOrder = 1;
245 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
246 columnMajorOrder = 0;
249 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%" SCNd64, &pointsToStat) != 1 ||
250 (pointsToStat <= 2 && pointsToStat != 0))
259 case SET_STANDARDDEVIATION:
264 if (scanned[iArg].n_items < 2) {
265 fprintf(stderr,
"error: invalid -%s syntax\n", option[code]);
268 if (!
scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
269 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
270 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
271 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
272 sprintf(s,
"invalid -%s syntax", scanned[iArg].list[0]);
275 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
276 request[requests - 1].topLimit = topLimit;
277 request[requests - 1].bottomLimit = bottomLimit;
280 if (scanned[iArg].n_items < 2) {
281 fprintf(stderr,
"error: invalid -%s syntax\n", option[code]);
285 if (!
scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
286 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
288 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
289 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
291 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
292 request[requests - 1].sumPower = power;
293 request[requests - 1].topLimit = topLimit;
294 request[requests - 1].bottomLimit = bottomLimit;
297 if (scanned[iArg].n_items < 2) {
298 fprintf(stderr,
"error: invalid -%s syntax\n", option[code]);
301 if (!
scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
302 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
303 "independent",
SDDS_STRING, &independent, 1, INDEPENDENT_GIVEN,
305 !(scanFlags&INDEPENDENT_GIVEN))
307 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
308 request[requests - 1].independentColumn = independent;
309 request[requests - 1].topLimit = request[requests - 1].bottomLimit = 0;
312 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
318 scanned[iArg].n_items -= 1;
319 if (!
scanItemList(&scanFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
323 !strlen(windowColumn) ||
325 SDDS_Bomb(
"invalid -window syntax/values");
331 fprintf(stderr,
"error: unknown option '%s' given\n", scanned[iArg].list[0]);
338 input = scanned[iArg].list[0];
340 output = scanned[iArg].list[0];
346 if (pointsToStat < 0 && !windowColumn) {
357 if (!(stat = compileStatDefinitions(&inData, request, requests)))
361 fprintf(stderr,
"%ld stats\n", stats);
362 for (iStat = 0; iStat < stats; iStat++) {
363 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
364 fprintf(stderr,
"iStat=%ld iColumn=%ld source=%s result=%s\n", iStat, iColumn, stat[iStat].sourceColumn[iColumn], stat[iStat].resultColumn[iColumn]);
365 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN)
366 fprintf(stderr,
" bottom = %e\n", stat[iStat].bottomLimit);
367 if (stat[iStat].flags & TOPLIMIT_GIVEN)
368 fprintf(stderr,
" top = %e\n", stat[iStat].topLimit);
373 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder))
380 SDDS_Bomb(
"Window column is not numeric");
385 pointsToStat0 = pointsToStat;
388 pointsToStat = pointsToStat0;
389 if (pointsToStat == 0)
395 if (rows < pointsToStat) {
402 outputRows = rows - pointsToStat + 1;
404 outputRows = rows / pointsToStat;
412 if (outputRows > outputRowsMax &&
413 !(outputData =
SDDS_Realloc(outputData,
sizeof(*outputData) * (outputRowsMax = outputRows))))
415 for (iStat = 0; iStat < stats; iStat++) {
416 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
423 if (stat[iStat].independentColumn &&
426 for (outputRow = startRow = 0; outputRow < outputRows; outputRow++, startRow += (overlap ? 1 : pointsToStat)) {
428 short windowFound = 0;
433 for (pointsToStat = 1; pointsToStat < outputRows - startRow; pointsToStat++) {
434 region = (windowData[startRow + pointsToStat] - windowData[windowRef]) / windowWidth;
435 if (region != lastRegion) {
441 if (!windowFound && pointsToStat < 2)
443 if (startRow + pointsToStat > rows) {
444 pointsToStat = rows - startRow - 1;
445 if (pointsToStat <= 0)
449 fprintf(stderr,
"row=%" PRId64
" pointsToStat=%" PRId64
" delta=%.9lf (%.9lf -> %.9lf)\n", startRow, pointsToStat, windowData[startRow + pointsToStat - 1] - windowData[startRow], windowData[startRow], windowData[startRow + pointsToStat - 1]);
452 inputDataOffset = inputData + startRow;
453 switch (stat[iStat].optionCode) {
456 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
457 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
458 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
460 if (inputDataOffset[rowOffset] > result)
461 result = inputDataOffset[rowOffset];
466 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
467 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
468 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
470 if (inputDataOffset[rowOffset] < result)
471 result = inputDataOffset[rowOffset];
477 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
478 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
479 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
481 result += inputDataOffset[rowOffset];
492 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
493 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
494 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
496 newData =
SDDS_Realloc(newData,
sizeof(*newData) * (count + 1));
497 newData[count] = inputDataOffset[rowOffset];
508 case SET_STANDARDDEVIATION:
510 sum1 = sum2 = count = 0;
511 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
512 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
513 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
515 sum1 += inputDataOffset[rowOffset];
516 sum2 += inputDataOffset[rowOffset] * inputDataOffset[rowOffset];
520 if ((result = sum2 / count - sqr(sum1 / count)) <= 0)
523 result = sqrt(result * count / (count - 1.0));
524 if (stat[iStat].optionCode == SET_SIGMA)
525 result /= sqrt(count);
530 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
531 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
532 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
534 sum2 += inputDataOffset[rowOffset] * inputDataOffset[rowOffset];
538 result = sqrt(sum2 / count);
544 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
545 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
546 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
548 sum1 +=
ipow(inputDataOffset[rowOffset], stat[iStat].sumPower);
557 if (!
unweightedLinearFit(indepData+startRow, inputDataOffset, pointsToStat, &slope, &intercept,
565 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
566 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
567 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
569 result = inputDataOffset[rowOffset];
574 fprintf(stderr,
"Unknown statistics code %ld in sddsrunave\n", stat[iStat].optionCode);
578 outputData[outputRow] = result;
603long addStatRequests(
STAT_REQUEST **statRequest,
long requests,
char **item,
long items,
long code,
unsigned long flags) {
605 if (!(*statRequest =
SDDS_Realloc(*statRequest,
sizeof(**statRequest) * (requests + 1))) ||
606 !((*statRequest)[requests].sourceColumn = (
char **)malloc(
sizeof(*(*statRequest)[requests].sourceColumn) * items)))
608 for (i = 0; i < items; i++) {
609 (*statRequest)[requests].sourceColumn[i] = item[i];
611 (*statRequest)[requests].sourceColumns = items;
612 (*statRequest)[requests].optionCode = code;
613 (*statRequest)[requests].sumPower = 1;
614 (*statRequest)[requests].flags = flags;
615 (*statRequest)[requests].independentColumn = NULL;
622 char s[SDDS_MAXLINE];
626 for (iReq = 0; iReq < requests; iReq++) {
627 if ((stat[iReq].sourceColumns = expandColumnPairNames(inData, &request[iReq].sourceColumn, NULL, request[iReq].sourceColumns, NULL, 0, FIND_NUMERIC_TYPE, 0)) <= 0) {
628 fprintf(stderr,
"Error: no match for column names (sddsrunstats):\n");
629 for (iName = 0; iName < request[iReq].sourceColumns; iName++)
630 fprintf(stderr,
"%s, ", request[iReq].sourceColumn[iReq]);
634 stat[iReq].sourceColumn = request[iReq].sourceColumn;
635 if (!(stat[iReq].resultColumn = malloc(
sizeof(*stat[iReq].resultColumn) * stat[iReq].sourceColumns)) ||
636 !(stat[iReq].resultIndex = malloc(
sizeof(*stat[iReq].resultIndex) * stat[iReq].sourceColumns))) {
639 for (iName = 0; iName < stat[iReq].sourceColumns; iName++) {
640 sprintf(s,
"%s%s", stat[iReq].sourceColumn[iName], statSuffix[request[iReq].optionCode]);
643 stat[iReq].optionCode = request[iReq].optionCode;
644 stat[iReq].sumPower = request[iReq].sumPower;
645 stat[iReq].flags = request[iReq].flags;
646 stat[iReq].topLimit = request[iReq].topLimit;
647 stat[iReq].bottomLimit = request[iReq].bottomLimit;
648 stat[iReq].independentColumn = request[iReq].independentColumn;
656 char s[SDDS_MAXLINE];
660 if (columnMajorOrder != -1)
661 outData->layout.data_mode.column_major = columnMajorOrder;
663 outData->layout.data_mode.column_major = inData->layout.data_mode.column_major;
664 for (iStat = 0; iStat < stats; iStat++) {
665 for (column = 0; column < stat[iStat].sourceColumns; column++) {
667 sprintf(s,
"Problem transferring definition of column %s to %s\n", stat[iStat].sourceColumn[column], stat[iStat].resultColumn[column]);
671 if ((stat[iStat].resultIndex[column] =
SDDS_GetColumnIndex(outData, stat[iStat].resultColumn[column])) < 0) {
672 sprintf(s,
"Problem creating column %s", stat[iStat].resultColumn[column]);
679 sprintf(s,
"Problem changing attributes of new column %s", stat[iStat].resultColumn[column]);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllArrayDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all array definitions from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
long unweightedLinearFit(double *xData, double *yData, long nData, double *slope, double *intercept, double *variance)
Performs an unweighted linear fit on the provided data.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.