85 SET_STANDARDDEVIATION,
101char *option[N_OPTIONS] = {
122char *statSuffix[N_STATS] = {
135#define TOPLIMIT_GIVEN 0x0001U
136#define BOTTOMLIMIT_GIVEN 0x0002U
137#define INDEPENDENT_GIVEN 0x0004U
143 char **sourceColumn, *independentColumn;
144 long sourceColumns, sumPower, optionCode;
146 double topLimit, bottomLimit;
154 char **sourceColumn, **resultColumn, *independentColumn;
155 long sourceColumns, optionCode, *resultIndex, sumPower;
157 double topLimit, bottomLimit;
160long addStatRequests(
STAT_REQUEST **statRequest,
long requests,
char **item,
long items,
long code,
unsigned long flag);
165 "sddsrunstats [<input>] [<output>] [-pipe[=input][,output]]\n"
166 " [{-points=<integer> | -window=column=<column>,width=<value>}]\n"
169 " [-mean=[<limitOps>],<columnNameList>]\n"
170 " [-median=[<limitOps>],<columnNameList>]\n"
171 " [-minimum=[<limitOps>],<columnNameList>]\n"
172 " [-maximum=[<limitOps>],<columnNameList>]\n"
173 " [-standardDeviation=[<limitOps>],<columnNameList>]\n"
174 " [-sigma=[<limitOps>],<columnNameList>]\n"
175 " [-sum=[<limitOps>][,power=<integer>],<columnNameList>]\n"
176 " [-sample=[<limitOps>],<columnNameList>]\n"
177 " [-slope=independent=<columnName>,<columnNameList>]\n"
179 " <limitOps> is of the form [topLimit=<value>,][bottomLimit=<value>] [-majorOrder=row|column]\n\n"
180 "Computes running statistics of columns of data. The <columnNameList> may contain\n"
181 "wildcards, in which case an additional output column is produced for every matching\n"
182 "column. By default, statistics are done with a sliding window, so the values are\n"
183 "running statistics; for blocked statistics, use -noOverlap. For statistics on\n"
184 "the entire page, use -points=0.\n"
185 "The -partialOk option tells sddsrunstats to do computations even\n"
186 "if the number of available rows is less than the number of points\n"
187 "specified; by default, such data is simply ignored.\n"
188 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
190int main(
int argc,
char **argv) {
196 SCANNED_ARG *scanned;
200 int64_t pointsToStat;
201 long pointsToStat0, overlap;
202 int64_t rows, outputRowsMax, outputRows, outputRow;
203 long iArg, code, iStat, iColumn;
204 int64_t startRow, rowOffset;
205 char *input, *output, *windowColumn;
206 double *inputData, *outputData, topLimit, bottomLimit, *inputDataOffset, result, sum1, sum2, *newData;
207 double windowWidth, *windowData, slope, intercept, variance;
209 unsigned long pipeFlags, scanFlags, majorOrderFlag;
211 long lastRegion, region, windowRef, partialOk;
212 short columnMajorOrder = -1;
215 argc =
scanargs(&scanned, argc, argv);
217 bomb(
"too few arguments", USAGE);
221 input = output = NULL;
224 stats = requests = pipeFlags = 0;
231 for (iArg = 1; iArg < argc; iArg++) {
233 if (scanned[iArg].arg_type == OPTION) {
235 switch (code =
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
236 case SET_MAJOR_ORDER:
238 scanned[iArg].n_items--;
239 if (scanned[iArg].n_items > 0 &&
240 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
241 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
242 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
243 SDDS_Bomb(
"invalid -majorOrder syntax/values");
244 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
245 columnMajorOrder = 1;
246 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
247 columnMajorOrder = 0;
250 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%" SCNd64, &pointsToStat) != 1 ||
251 (pointsToStat <= 2 && pointsToStat != 0))
260 case SET_STANDARDDEVIATION:
265 if (scanned[iArg].n_items < 2) {
266 fprintf(stderr,
"error: invalid -%s syntax\n", option[code]);
269 if (!
scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
270 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
271 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
272 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL)) {
273 sprintf(s,
"invalid -%s syntax", scanned[iArg].list[0]);
276 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
277 request[requests - 1].topLimit = topLimit;
278 request[requests - 1].bottomLimit = bottomLimit;
281 if (scanned[iArg].n_items < 2) {
282 fprintf(stderr,
"error: invalid -%s syntax\n", option[code]);
286 if (!
scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
287 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
289 "toplimit",
SDDS_DOUBLE, &topLimit, 1, TOPLIMIT_GIVEN,
290 "bottomlimit",
SDDS_DOUBLE, &bottomLimit, 1, BOTTOMLIMIT_GIVEN, NULL))
292 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
293 request[requests - 1].sumPower = power;
294 request[requests - 1].topLimit = topLimit;
295 request[requests - 1].bottomLimit = bottomLimit;
298 if (scanned[iArg].n_items < 2) {
299 fprintf(stderr,
"error: invalid -%s syntax\n", option[code]);
302 if (!
scanItemList(&scanFlags, scanned[iArg].list, &scanned[iArg].n_items,
303 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS | SCANITEMLIST_IGNORE_VALUELESS,
304 "independent",
SDDS_STRING, &independent, 1, INDEPENDENT_GIVEN,
306 !(scanFlags & INDEPENDENT_GIVEN))
308 requests = addStatRequests(&request, requests, scanned[iArg].list + 1, scanned[iArg].n_items - 1, code, scanFlags);
309 request[requests - 1].independentColumn = independent;
310 request[requests - 1].topLimit = request[requests - 1].bottomLimit = 0;
313 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
319 scanned[iArg].n_items -= 1;
320 if (!
scanItemList(&scanFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
324 !strlen(windowColumn) ||
326 SDDS_Bomb(
"invalid -window syntax/values");
332 fprintf(stderr,
"error: unknown option '%s' given\n", scanned[iArg].list[0]);
339 input = scanned[iArg].list[0];
341 output = scanned[iArg].list[0];
347 if (pointsToStat < 0 && !windowColumn) {
358 if (!(stat = compileStatDefinitions(&inData, request, requests)))
362 fprintf(stderr,
"%ld stats\n", stats);
363 for (iStat = 0; iStat < stats; iStat++) {
364 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
365 fprintf(stderr,
"iStat=%ld iColumn=%ld source=%s result=%s\n", iStat, iColumn, stat[iStat].sourceColumn[iColumn], stat[iStat].resultColumn[iColumn]);
366 if (stat[iStat].flags & BOTTOMLIMIT_GIVEN)
367 fprintf(stderr,
" bottom = %e\n", stat[iStat].bottomLimit);
368 if (stat[iStat].flags & TOPLIMIT_GIVEN)
369 fprintf(stderr,
" top = %e\n", stat[iStat].topLimit);
374 if (!setupOutputFile(&outData, output, &inData, stat, stats, columnMajorOrder))
381 SDDS_Bomb(
"Window column is not numeric");
386 pointsToStat0 = pointsToStat;
389 pointsToStat = pointsToStat0;
390 if (pointsToStat == 0)
396 if (rows < pointsToStat) {
403 outputRows = rows - pointsToStat + 1;
405 outputRows = rows / pointsToStat;
413 if (outputRows > outputRowsMax &&
414 !(outputData =
SDDS_Realloc(outputData,
sizeof(*outputData) * (outputRowsMax = outputRows))))
416 for (iStat = 0; iStat < stats; iStat++) {
417 for (iColumn = 0; iColumn < stat[iStat].sourceColumns; iColumn++) {
424 if (stat[iStat].independentColumn &&
427 for (outputRow = startRow = 0; outputRow < outputRows; outputRow++, startRow += (overlap ? 1 : pointsToStat)) {
429 short windowFound = 0;
434 for (pointsToStat = 1; pointsToStat < outputRows - startRow; pointsToStat++) {
435 region = (windowData[startRow + pointsToStat] - windowData[windowRef]) / windowWidth;
436 if (region != lastRegion) {
442 if (!windowFound && pointsToStat < 2)
444 if (startRow + pointsToStat > rows) {
445 pointsToStat = rows - startRow - 1;
446 if (pointsToStat <= 0)
450 fprintf(stderr,
"row=%" PRId64
" pointsToStat=%" PRId64
" delta=%.9lf (%.9lf -> %.9lf)\n", startRow, pointsToStat, windowData[startRow + pointsToStat - 1] - windowData[startRow], windowData[startRow], windowData[startRow + pointsToStat - 1]);
453 inputDataOffset = inputData + startRow;
454 switch (stat[iStat].optionCode) {
457 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
458 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
459 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
461 if (inputDataOffset[rowOffset] > result)
462 result = inputDataOffset[rowOffset];
467 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
468 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
469 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
471 if (inputDataOffset[rowOffset] < result)
472 result = inputDataOffset[rowOffset];
478 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
479 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
480 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
482 result += inputDataOffset[rowOffset];
493 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
494 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
495 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
497 newData =
SDDS_Realloc(newData,
sizeof(*newData) * (count + 1));
498 newData[count] = inputDataOffset[rowOffset];
509 case SET_STANDARDDEVIATION:
511 sum1 = sum2 = count = 0;
512 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
513 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
514 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
516 sum1 += inputDataOffset[rowOffset];
517 sum2 += inputDataOffset[rowOffset] * inputDataOffset[rowOffset];
521 if ((result = sum2 / count - sqr(sum1 / count)) <= 0)
524 result = sqrt(result * count / (count - 1.0));
525 if (stat[iStat].optionCode == SET_SIGMA)
526 result /= sqrt(count);
531 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
532 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
533 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
535 sum2 += inputDataOffset[rowOffset] * inputDataOffset[rowOffset];
539 result = sqrt(sum2 / count);
545 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
546 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
547 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
549 sum1 +=
ipow(inputDataOffset[rowOffset], stat[iStat].sumPower);
558 if (!
unweightedLinearFit(indepData + startRow, inputDataOffset, pointsToStat, &slope, &intercept,
566 for (rowOffset = 0; rowOffset < pointsToStat; rowOffset++) {
567 if ((stat[iStat].flags & TOPLIMIT_GIVEN && inputDataOffset[rowOffset] > stat[iStat].topLimit) ||
568 (stat[iStat].flags & BOTTOMLIMIT_GIVEN && inputDataOffset[rowOffset] < stat[iStat].bottomLimit))
570 result = inputDataOffset[rowOffset];
575 fprintf(stderr,
"Unknown statistics code %ld in sddsrunave\n", stat[iStat].optionCode);
579 outputData[outputRow] = result;
604long addStatRequests(
STAT_REQUEST **statRequest,
long requests,
char **item,
long items,
long code,
unsigned long flags) {
606 if (!(*statRequest =
SDDS_Realloc(*statRequest,
sizeof(**statRequest) * (requests + 1))) ||
607 !((*statRequest)[requests].sourceColumn = (
char **)malloc(
sizeof(*(*statRequest)[requests].sourceColumn) * items)))
609 for (i = 0; i < items; i++) {
610 (*statRequest)[requests].sourceColumn[i] = item[i];
612 (*statRequest)[requests].sourceColumns = items;
613 (*statRequest)[requests].optionCode = code;
614 (*statRequest)[requests].sumPower = 1;
615 (*statRequest)[requests].flags = flags;
616 (*statRequest)[requests].independentColumn = NULL;
623 char s[SDDS_MAXLINE];
627 for (iReq = 0; iReq < requests; iReq++) {
628 if ((stat[iReq].sourceColumns = expandColumnPairNames(inData, &request[iReq].sourceColumn, NULL, request[iReq].sourceColumns, NULL, 0, FIND_NUMERIC_TYPE, 0)) <= 0) {
629 fprintf(stderr,
"Error: no match for column names (sddsrunstats):\n");
630 for (iName = 0; iName < request[iReq].sourceColumns; iName++)
631 fprintf(stderr,
"%s, ", request[iReq].sourceColumn[iReq]);
635 stat[iReq].sourceColumn = request[iReq].sourceColumn;
636 if (!(stat[iReq].resultColumn = malloc(
sizeof(*stat[iReq].resultColumn) * stat[iReq].sourceColumns)) ||
637 !(stat[iReq].resultIndex = malloc(
sizeof(*stat[iReq].resultIndex) * stat[iReq].sourceColumns))) {
640 for (iName = 0; iName < stat[iReq].sourceColumns; iName++) {
641 sprintf(s,
"%s%s", stat[iReq].sourceColumn[iName], statSuffix[request[iReq].optionCode]);
644 stat[iReq].optionCode = request[iReq].optionCode;
645 stat[iReq].sumPower = request[iReq].sumPower;
646 stat[iReq].flags = request[iReq].flags;
647 stat[iReq].topLimit = request[iReq].topLimit;
648 stat[iReq].bottomLimit = request[iReq].bottomLimit;
649 stat[iReq].independentColumn = request[iReq].independentColumn;
657 char s[SDDS_MAXLINE];
661 if (columnMajorOrder != -1)
662 outData->layout.data_mode.column_major = columnMajorOrder;
664 outData->layout.data_mode.column_major = inData->layout.data_mode.column_major;
665 for (iStat = 0; iStat < stats; iStat++) {
666 for (column = 0; column < stat[iStat].sourceColumns; column++) {
668 sprintf(s,
"Problem transferring definition of column %s to %s\n", stat[iStat].sourceColumn[column], stat[iStat].resultColumn[column]);
672 if ((stat[iStat].resultIndex[column] =
SDDS_GetColumnIndex(outData, stat[iStat].resultColumn[column])) < 0) {
673 sprintf(s,
"Problem creating column %s", stat[iStat].resultColumn[column]);
680 sprintf(s,
"Problem changing attributes of new column %s", stat[iStat].resultColumn[column]);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllArrayDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all array definitions from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
Utility functions for SDDS dataset manipulation and string array operations.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
long unweightedLinearFit(double *xData, double *yData, long nData, double *slope, double *intercept, double *variance)
Performs an unweighted linear fit on the provided data.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.