76char *option[N_OPTIONS] = {
97#define USAGE "Usage:\n" \
98 "sddsoutlier [<inputfile>] [<outputfile>] [OPTIONS]\n\n" \
100 " -pipe=[input][,output]\n" \
101 " Use standard input and/or output as data streams.\n" \
103 " Enable verbose output, displaying processing information.\n" \
105 " Suppress warning messages.\n" \
106 " -columns=<list-of-names>\n" \
107 " Specify a comma-separated list of column names to process.\n" \
108 " -excludeColumns=<list-of-names>\n" \
109 " Specify a comma-separated list of column names to exclude from processing.\n" \
110 " -stDevLimit=<value>\n" \
111 " Point is an outlier if it is more than <value> standard deviations from the mean.\n" \
112 " -absLimit=<value>\n" \
113 " Point is an outlier if it has an absolute value greater than <value>.\n" \
114 " -absDeviationLimit=<value>[,neighbor=<number>]\n" \
115 " Point is an outlier if its absolute deviation from the mean exceeds <value>.\n" \
116 " If neighbor is provided, the mean is computed with the neighbors instead of the whole data.\n" \
117 " -minimumLimit=<value>\n" \
118 " Point is an outlier if it is less than <value>.\n" \
119 " -maximumLimit=<value>\n" \
120 " Point is an outlier if it is greater than <value>.\n" \
121 " -chanceLimit=<minimumChance>\n" \
122 " Point is an outlier if it has a probability less than <minimumChance> of occurring (Gaussian statistics).\n" \
123 " -percentileLimit=lower=<lowerPercent>,upper=<upperPercent>\n" \
124 " Point is an outlier if it is below the <lowerPercent> percentile or above the <upperPercent> percentile.\n" \
125 " -unpopular=bins=<number>\n" \
126 " Remove points that are not in the most populated bin based on a histogram with <number> bins.\n" \
128 " Invert the outlier selection criteria.\n" \
129 " -majorOrder=row|column\n" \
130 " Specify output file data ordering as row or column major.\n" \
132 " Mark identified outliers without removing them.\n" \
133 " -replaceOnly={lastValue|nextValue|interpolatedValue|value=<number>}\n" \
134 " Replace outliers with specified values or strategies.\n" \
135 " -passes=<number>\n" \
136 " Define the number of passes for outlier detection.\n\n" \
137 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n"
139#define OUTLIER_CONTROL_INVOKED 0x00001U
140#define OUTLIER_STDEV_GIVEN 0x00002U
141#define OUTLIER_FRACTION_GIVEN 0x00004U
142#define OUTLIER_STDEVLIMIT_GIVEN 0x00008U
143#define OUTLIER_UNPOPULAR_BINS 0x00010U
144#define OUTLIER_VERBOSE_GIVEN 0x00020U
145#define OUTLIER_ABSLIMIT_GIVEN 0x00040U
146#define OUTLIER_ABSDEVLIMIT_GIVEN 0x00080U
147#define OUTLIER_INVERT_GIVEN 0x00100U
148#define OUTLIER_MARKONLY 0x00200U
149#define OUTLIER_CHANCELIMIT_GIVEN 0x00400U
150#define OUTLIER_MAXLIMIT_GIVEN 0x00800U
151#define OUTLIER_MINLIMIT_GIVEN 0x01000U
152#define OUTLIER_REPLACELAST 0x02000U
153#define OUTLIER_REPLACENEXT 0x04000U
154#define OUTLIER_REPLACEINTERP 0x08000U
155#define OUTLIER_REPLACEVALUE 0x10000U
156#define OUTLIER_REPLACEFLAGS (OUTLIER_REPLACELAST | OUTLIER_REPLACENEXT | OUTLIER_REPLACEINTERP | OUTLIER_REPLACEVALUE)
157#define OUTLIER_PERCENTILE_LOWER 0x20000U
158#define OUTLIER_PERCENTILE_UPPER 0x40000U
159#define OUTLIER_PERCENTILE_FLAGS (OUTLIER_PERCENTILE_LOWER | OUTLIER_PERCENTILE_UPPER)
163 double stDevLimit, fractionLimit, absoluteLimit, absDevLimit;
164 double chanceLimit, replacementValue, maximumLimit, minimumLimit;
165 double percentilePoint[2];
167 int32_t unpopularBins;
172int64_t removeOutliers(
SDDS_DATASET *SDDSin, int64_t rows,
char **column,
long columns,
OUTLIER_CONTROL *outlierControl, int32_t *isOutlier);
173long meanStDevForFlaggedData(
double *mean,
double *stDev,
double *data, int32_t *keep, int64_t rows);
175int main(
int argc,
char **argv) {
177 char **column, **excludeColumn;
178 long columns, excludeColumns;
179 char *input, *output;
180 SCANNED_ARG *scanned;
182 long readCode, dataLimitGiven, tmpfileUsed;
184 long noWarnings, isOutlierIndex;
187 unsigned long pipeFlags, tmpFlags, majorOrderFlag, dummyFlags;
188 short columnMajorOrder = -1;
191 argc =
scanargs(&scanned, argc, argv);
196 output = input = NULL;
197 columns = excludeColumns = dataLimitGiven = 0;
198 column = excludeColumn = NULL;
200 outlierControl.flags = 0;
201 outlierControl.passes = 1;
202 outlierControl.neighbors = 0;
203 pipeFlags = tmpfileUsed = noWarnings = isOutlierIndex = 0;
205 for (iArg = 1; iArg < argc; iArg++) {
206 if (scanned[iArg].arg_type == OPTION) {
208 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
209 case SET_MAJOR_ORDER:
211 scanned[iArg].n_items--;
212 if (scanned[iArg].n_items > 0 &&
213 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
214 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
215 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
216 SDDS_Bomb(
"invalid -majorOrder syntax/values");
217 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
218 columnMajorOrder = 1;
219 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
220 columnMajorOrder = 0;
224 SDDS_Bomb(
"only one -columns option may be given");
225 if (scanned[iArg].n_items < 2)
227 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
228 for (i = 0; i < columns; i++)
229 column[i] = scanned[iArg].list[i + 1];
233 SDDS_Bomb(
"only one -excludecolumns option may be given");
234 if (scanned[iArg].n_items < 2)
235 SDDS_Bomb(
"invalid -excludecolumns syntax");
236 excludeColumn =
tmalloc(
sizeof(*excludeColumn) * (excludeColumns = scanned[iArg].n_items - 1));
237 for (i = 0; i < excludeColumns; i++)
238 excludeColumn[i] = scanned[iArg].list[i + 1];
240 case SET_STDDEV_LIMIT:
241 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.stDevLimit) != 1 ||
242 outlierControl.stDevLimit <= 0)
244 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_STDEV_GIVEN | OUTLIER_STDEVLIMIT_GIVEN;
247 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.absoluteLimit) != 1 ||
248 outlierControl.absoluteLimit <= 0)
250 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_ABSLIMIT_GIVEN;
252 case SET_ABSDEV_LIMIT:
253 if (scanned[iArg].n_items < 2)
254 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
255 if (scanned[iArg].n_items == 2) {
256 if (sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.absDevLimit) != 1 || outlierControl.absDevLimit <= 0)
257 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
259 if (sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.absDevLimit) != 1 || outlierControl.absDevLimit <= 0)
260 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
261 scanned[iArg].list += 2;
262 scanned[iArg].n_items -= 2;
263 if (scanned[iArg].n_items > 0 &&
264 (!
scanItemList(&dummyFlags, scanned[iArg].list, &scanned[iArg].n_items, 0,
"neighbors",
SDDS_LONG, &(outlierControl.neighbors), 1, 0, NULL)))
265 SDDS_Bomb(
"invalid -absDeviationLimit syntax/value");
266 if (outlierControl.neighbors % 2 == 0)
267 outlierControl.neighbors += 1;
269 scanned[iArg].list -= 2;
270 scanned[iArg].n_items += 2;
272 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_ABSDEVLIMIT_GIVEN;
275 outlierControl.flags |= OUTLIER_VERBOSE_GIVEN;
278 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
285 outlierControl.flags |= OUTLIER_INVERT_GIVEN;
288 outlierControl.flags |= OUTLIER_MARKONLY;
290 case SET_CHANCELIMIT:
291 if (scanned[iArg].n_items != 2 ||
292 sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.chanceLimit) != 1 ||
293 outlierControl.chanceLimit <= 0)
294 SDDS_Bomb(
"invalid -chanceLimit syntax");
295 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_CHANCELIMIT_GIVEN;
298 if (scanned[iArg].n_items != 2 ||
299 sscanf(scanned[iArg].list[1],
"%ld", &outlierControl.passes) != 1 ||
300 outlierControl.passes < 1)
304 outlierControl.flags |= OUTLIER_MAXLIMIT_GIVEN | OUTLIER_CONTROL_INVOKED;
305 if (scanned[iArg].n_items != 2 ||
306 sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.maximumLimit) != 1)
307 SDDS_Bomb(
"invalid -maximumLimit syntax");
310 outlierControl.flags |= OUTLIER_MINLIMIT_GIVEN | OUTLIER_CONTROL_INVOKED;
311 if (scanned[iArg].n_items != 2 ||
312 sscanf(scanned[iArg].list[1],
"%lf", &outlierControl.minimumLimit) != 1)
313 SDDS_Bomb(
"invalid -minimumLimit syntax");
316 if (scanned[iArg].n_items != 2)
318 scanned[iArg].n_items -= 1;
319 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
320 "lastvalue", -1, NULL, 0, OUTLIER_REPLACELAST,
321 "nextvalue", -1, NULL, 0, OUTLIER_REPLACENEXT,
322 "interpolatedvalue", -1, NULL, 0, OUTLIER_REPLACEINTERP,
323 "value",
SDDS_DOUBLE, &outlierControl.replacementValue, 1, OUTLIER_REPLACEVALUE, NULL))
324 SDDS_Bomb(
"invalid -replace syntax/values");
325 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
327 case SET_PERCENTILE_LIMIT:
328 if (scanned[iArg].n_items < 3)
329 SDDS_Bomb(
"invalid -percentileLimit syntax");
330 scanned[iArg].n_items -= 1;
331 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
332 "lower",
SDDS_DOUBLE, &outlierControl.percentilePoint[0], 1, OUTLIER_PERCENTILE_LOWER,
333 "upper",
SDDS_DOUBLE, &outlierControl.percentilePoint[1], 1, OUTLIER_PERCENTILE_UPPER, NULL) ||
334 !(tmpFlags & OUTLIER_PERCENTILE_LOWER) || !(tmpFlags & OUTLIER_PERCENTILE_UPPER) ||
335 outlierControl.percentilePoint[0] >= outlierControl.percentilePoint[1])
336 SDDS_Bomb(
"invalid -percentileLimit syntax");
337 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
340 if (scanned[iArg].n_items < 2)
342 scanned[iArg].n_items -= 1;
343 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
344 "bins",
SDDS_LONG, &(outlierControl.unpopularBins), 1, OUTLIER_UNPOPULAR_BINS, NULL) ||
345 !(tmpFlags & OUTLIER_UNPOPULAR_BINS) || outlierControl.unpopularBins < 2)
347 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
350 fprintf(stderr,
"Error: Unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
356 input = scanned[iArg].list[0];
358 output = scanned[iArg].list[0];
363 if (outlierControl.flags & OUTLIER_REPLACEFLAGS && outlierControl.flags & OUTLIER_MARKONLY)
364 SDDS_Bomb(
"Cannot use -replaceOnly and -markOnly simultaneously.");
366 processFilenames(
"sddsoutlier", &input, &output, pipeFlags, noWarnings, &tmpfileUsed);
368 if (!(outlierControl.flags & OUTLIER_CONTROL_INVOKED)) {
369 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_STDEV_GIVEN | OUTLIER_STDEVLIMIT_GIVEN;
370 outlierControl.stDevLimit = 2;
377 if (columnMajorOrder != -1)
378 SDDSout.layout.data_mode.column_major = columnMajorOrder;
380 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
382 if (outlierControl.flags & OUTLIER_MARKONLY &&
391 if ((columns = expandColumnPairNames(&SDDSout, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0)) <= 0) {
393 SDDS_Bomb(
"No columns selected for outlier control.");
406 if (outlierControl.flags & OUTLIER_MARKONLY) {
407 if (isOutlierIndex >= 0) {
409 SDDS_Bomb(
"Unable to retrieve 'IsOutlier' column from input file despite its existence.");
412 isOutlier =
SDDS_Realloc(isOutlier,
sizeof(*isOutlier) * rows);
415 for (i = 0; i < rows; i++)
419 if (outlierControl.flags & OUTLIER_VERBOSE_GIVEN)
420 fprintf(stderr,
"%" PRId64
" rows in page %ld\n", rows, readCode);
421 if ((rows = removeOutliers(&SDDSout, rows, column, columns, &outlierControl, isOutlier)) == 0) {
423 fprintf(stderr,
" No rows left after outlier control--skipping page.\n");
426 if (outlierControl.flags & OUTLIER_VERBOSE_GIVEN)
427 fprintf(stderr,
"%" PRId64
" rows left after outlier control\n", rows);
429 fprintf(stderr,
"Problem with row selection:\n %" PRId64
" expected, %" PRId64
" counted\n", rows,
SDDS_CountRowsOfInterest(&SDDSout));
443int64_t removeOutliers(
SDDS_DATASET *dataset, int64_t rows,
char **column,
long columns,
OUTLIER_CONTROL *outlierControl, int32_t *isOutlier) {
445 int64_t irow, kept, killed, j, k, summed;
446 double *data, sum1, stDev, mean;
447 static int32_t *keep = NULL;
448 double lastGoodValue = 0;
449 int64_t irow0, irow1;
460 for (irow = 0; irow < rows; irow++)
464 for (irow = kept = 0; irow < rows; irow++)
465 if ((keep[irow] = !isOutlier[irow]))
469 for (icol = 0; icol < columns; icol++) {
475 for (ipass = 0; ipass < outlierControl->passes; ipass++) {
476 if (outlierControl->flags & OUTLIER_UNPOPULAR_BINS && rows > 1) {
477 double *hist, lo, hi, delta;
479 hist =
tmalloc(
sizeof(*hist) * outlierControl->unpopularBins);
481 make_histogram(hist, outlierControl->unpopularBins, lo, hi, data, rows, 1);
482 delta = (hi - lo) / outlierControl->unpopularBins;
483 index_min_max(&imin, &imax, hist, outlierControl->unpopularBins);
484 for (irow = killed = 0; irow < rows; irow++) {
485 ih = (data[irow] - lo) / delta;
493 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
494 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s unpopular control\n", killed, column[icol]);
497 if (outlierControl->flags & OUTLIER_PERCENTILE_FLAGS) {
498 double percentileResult[2];
500 if (
compute_percentiles(percentileResult, outlierControl->percentilePoint, 2, data, rows)) {
501 for (irow = killed = 0; irow < rows; irow++) {
502 if ((data[irow] < percentileResult[0] || data[irow] > percentileResult[1]) && keep[irow]) {
509 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
510 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s percentile outlier control\n", killed, column[icol]);
513 if (outlierControl->flags & OUTLIER_MINLIMIT_GIVEN) {
515 for (irow = killed = 0; irow < rows; irow++) {
516 if (keep[irow] && data[irow] < outlierControl->minimumLimit) {
522 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
523 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s minimum value outlier control\n", killed, column[icol]);
526 if (outlierControl->flags & OUTLIER_MAXLIMIT_GIVEN) {
528 for (irow = killed = 0; irow < rows; irow++) {
529 if (keep[irow] && data[irow] > outlierControl->maximumLimit) {
535 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
536 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s maximum value outlier control\n", killed, column[icol]);
539 if (outlierControl->flags & OUTLIER_ABSLIMIT_GIVEN) {
541 for (irow = killed = 0; irow < rows; irow++) {
542 if (keep[irow] && fabs(data[irow]) > outlierControl->absoluteLimit) {
548 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
549 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s absolute value outlier control\n", killed, column[icol]);
552 if (outlierControl->flags & OUTLIER_ABSDEVLIMIT_GIVEN) {
554 if (outlierControl->neighbors > 0) {
555 for (irow = killed = 0; irow < rows; irow++) {
559 for (j = irow - outlierControl->neighbors / 2; j <= irow + outlierControl->neighbors / 2; j++) {
561 k = irow + outlierControl->neighbors / 2 - j;
562 else if (j > rows - 1)
563 k = irow - outlierControl->neighbors / 2 - (j - rows + 1);
566 mean += fabs(data[k]);
568 mean = mean / outlierControl->neighbors;
569 if (keep[irow] && fabs(data[irow] - mean) > outlierControl->absDevLimit) {
576 for (irow = sum1 = summed = 0; irow < rows; irow++) {
584 mean = sum1 / summed;
585 for (irow = killed = 0; irow < rows; irow++)
586 if (keep[irow] && fabs(data[irow] - mean) > outlierControl->absDevLimit) {
592 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
593 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s absolute deviation outlier control\n", killed, column[icol]);
596 if (outlierControl->flags & OUTLIER_STDEV_GIVEN && kept && meanStDevForFlaggedData(&mean, &stDev, data, keep, rows) && stDev) {
598 for (irow = killed = 0; irow < rows; irow++)
599 if (keep[irow] && fabs(data[irow] - mean) > outlierControl->stDevLimit * stDev) {
604 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
605 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s standard deviation outlier control\n", killed, column[icol]);
608 if (outlierControl->flags & OUTLIER_CHANCELIMIT_GIVEN) {
610 if (kept && meanStDevForFlaggedData(&mean, &stDev, data, keep, rows) && stDev) {
612 double gProb, probOfSeeing;
614 for (irow = killed = 0; irow < rows; irow++) {
620 probOfSeeing = 1 -
ipow(1 - gProb, lastKept);
621 if (probOfSeeing < outlierControl->chanceLimit) {
627 if (killed && (outlierControl->flags & OUTLIER_VERBOSE_GIVEN))
628 fprintf(stderr,
"%" PRId64
" additional rows killed by column %s chance limit outlier control\n", killed, column[icol]);
633 if (outlierControl->flags & OUTLIER_REPLACEFLAGS && (outlierControl->flags & OUTLIER_INVERT_GIVEN)) {
634 for (irow = 0; irow < rows; irow++)
635 keep[irow] = !keep[irow];
639 if (outlierControl->flags & OUTLIER_REPLACELAST) {
640 for (irow = 0; irow < rows; irow++) {
642 lastGoodValue = data[irow];
646 for (irow = 0; irow < rows; irow++) {
649 data[irow] = lastGoodValue;
651 lastGoodValue = data[irow];
656 }
else if (outlierControl->flags & OUTLIER_REPLACENEXT) {
657 for (irow = rows - 1; irow >= 0; irow--) {
659 lastGoodValue = data[irow];
663 for (irow = rows - 1; irow >= 0; irow--) {
665 data[irow] = lastGoodValue;
668 lastGoodValue = data[irow];
673 }
else if (outlierControl->flags & OUTLIER_REPLACEINTERP) {
676 for (irow = 0; irow < rows; irow++) {
678 if ((irow0 = irow - 1) >= 0) {
679 if ((irow1 = irow + 1) < rows) {
680 while (irow1 < rows && !keep[irow1])
683 if (irow1 < rows && keep[irow1]) {
687 for (; irow < irow1; irow++)
688 data[irow] = data[irow0] + (data[irow1] - data[irow0]) / (1.0 * irow1 - irow0) * (irow - irow0);
692 for (; irow < rows; irow++)
693 data[irow] = data[irow0];
698 for (irow1 = irow + 1; irow1 < rows; irow1++) {
703 for (; irow < irow1; irow++)
704 data[irow] = data[irow1];
710 for (irow = 0; irow < rows; irow++)
715 }
else if (outlierControl->flags & OUTLIER_REPLACEVALUE) {
716 for (irow = 0; irow < rows; irow++) {
718 data[irow] = outlierControl->replacementValue;
729 if (outlierControl->flags & OUTLIER_INVERT_GIVEN) {
730 for (irow = 0; irow < rows; irow++)
731 keep[irow] = !keep[irow];
733 if (outlierControl->flags & OUTLIER_VERBOSE_GIVEN)
734 fprintf(stderr,
"%" PRId64
" rows left after inversion\n", kept);
737 if (isOutlier && (outlierControl->flags & OUTLIER_MARKONLY)) {
738 for (irow = 0; irow < rows; irow++)
739 isOutlier[irow] = !keep[irow];
750long meanStDevForFlaggedData(
double *mean,
double *stDev,
double *data, int32_t *keep, int64_t rows) {
751 int64_t irow, summed;
752 double sum1, sum2, value;
755 for (irow = sum1 = summed = 0; irow < rows; irow++) {
763 *mean = sum1 / summed;
764 for (irow = sum2 = 0; irow < rows; irow++) {
766 value = data[irow] - *mean;
767 sum2 += value * value;
770 *stDev = sqrt(sum2 / (summed - 1));
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumnFromLongs(SDDS_DATASET *SDDS_dataset, int32_t mode, int32_t *data, int64_t rows,...)
Sets the values for a single data column using long integer numbers.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
double normSigLevel(double z0, long tails)
Computes the probability that a standard normal variable exceeds a given value.