224 {
225 int iArg;
226 char **column, **excludeColumn;
227 long columns, excludeColumns;
228 char *input, *output;
229 SCANNED_ARG *scanned;
231 long readCode, dataLimitGiven, tmpfileUsed;
232 int64_t i, rows;
233 long noWarnings, isOutlierIndex;
234 int32_t *isOutlier;
236 unsigned long pipeFlags, tmpFlags, majorOrderFlag, dummyFlags;
237 short columnMajorOrder = -1;
238
240 argc =
scanargs(&scanned, argc, argv);
241 if (argc < 3) {
243 }
244
245 output = input = NULL;
246 columns = excludeColumns = dataLimitGiven = 0;
247 column = excludeColumn = NULL;
248
249 outlierControl.flags = 0;
250 outlierControl.passes = 1;
251 outlierControl.neighbors = 0;
252 pipeFlags = tmpfileUsed = noWarnings = isOutlierIndex = 0;
253
254 for (iArg = 1; iArg < argc; iArg++) {
255 if (scanned[iArg].arg_type == OPTION) {
256
257 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
258 case SET_MAJOR_ORDER:
259 majorOrderFlag = 0;
260 scanned[iArg].n_items--;
261 if (scanned[iArg].n_items > 0 &&
262 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
263 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
264 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
265 SDDS_Bomb(
"invalid -majorOrder syntax/values");
266 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
267 columnMajorOrder = 1;
268 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
269 columnMajorOrder = 0;
270 break;
271 case SET_COLUMNS:
272 if (columns)
273 SDDS_Bomb(
"only one -columns option may be given");
274 if (scanned[iArg].n_items < 2)
276 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
277 for (i = 0; i < columns; i++)
278 column[i] = scanned[iArg].list[i + 1];
279 break;
280 case SET_EXCLUDE:
281 if (excludeColumns)
282 SDDS_Bomb(
"only one -excludecolumns option may be given");
283 if (scanned[iArg].n_items < 2)
284 SDDS_Bomb(
"invalid -excludecolumns syntax");
285 excludeColumn =
tmalloc(
sizeof(*excludeColumn) * (excludeColumns = scanned[iArg].n_items - 1));
286 for (i = 0; i < excludeColumns; i++)
287 excludeColumn[i] = scanned[iArg].list[i + 1];
288 break;
289 case SET_STDDEV_LIMIT:
290 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &outlierControl.stDevLimit) != 1 ||
291 outlierControl.stDevLimit <= 0)
293 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_STDEV_GIVEN | OUTLIER_STDEVLIMIT_GIVEN;
294 break;
295 case SET_ABS_LIMIT:
296 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &outlierControl.absoluteLimit) != 1 ||
297 outlierControl.absoluteLimit <= 0)
299 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_ABSLIMIT_GIVEN;
300 break;
301 case SET_ABSDEV_LIMIT:
302 if (scanned[iArg].n_items < 2)
303 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
304 if (scanned[iArg].n_items == 2) {
305 if (sscanf(scanned[iArg].list[1], "%lf", &outlierControl.absDevLimit) != 1 || outlierControl.absDevLimit <= 0)
306 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
307 } else {
308 if (sscanf(scanned[iArg].list[1], "%lf", &outlierControl.absDevLimit) != 1 || outlierControl.absDevLimit <= 0)
309 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
310 scanned[iArg].list += 2;
311 scanned[iArg].n_items -= 2;
312 if (scanned[iArg].n_items > 0 &&
313 (!
scanItemList(&dummyFlags, scanned[iArg].list, &scanned[iArg].n_items, 0,
"neighbors",
SDDS_LONG, &(outlierControl.neighbors), 1, 0, NULL)))
314 SDDS_Bomb(
"invalid -absDeviationLimit syntax/value");
315 if (outlierControl.neighbors % 2 == 0)
316 outlierControl.neighbors += 1;
317
318 scanned[iArg].list -= 2;
319 scanned[iArg].n_items += 2;
320 }
321 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_ABSDEVLIMIT_GIVEN;
322 break;
323 case SET_VERBOSE:
324 outlierControl.flags |= OUTLIER_VERBOSE_GIVEN;
325 break;
326 case SET_PIPE:
327 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
329 break;
330 case SET_NOWARNINGS:
331 noWarnings = 1;
332 break;
333 case SET_INVERT:
334 outlierControl.flags |= OUTLIER_INVERT_GIVEN;
335 break;
336 case SET_MARKONLY:
337 outlierControl.flags |= OUTLIER_MARKONLY;
338 break;
339 case SET_CHANCELIMIT:
340 if (scanned[iArg].n_items != 2 ||
341 sscanf(scanned[iArg].list[1], "%lf", &outlierControl.chanceLimit) != 1 ||
342 outlierControl.chanceLimit <= 0)
343 SDDS_Bomb(
"invalid -chanceLimit syntax");
344 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_CHANCELIMIT_GIVEN;
345 break;
346 case SET_PASSES:
347 if (scanned[iArg].n_items != 2 ||
348 sscanf(scanned[iArg].list[1], "%ld", &outlierControl.passes) != 1 ||
349 outlierControl.passes < 1)
351 break;
352 case SET_MAXLIMIT:
353 outlierControl.flags |= OUTLIER_MAXLIMIT_GIVEN | OUTLIER_CONTROL_INVOKED;
354 if (scanned[iArg].n_items != 2 ||
355 sscanf(scanned[iArg].list[1], "%lf", &outlierControl.maximumLimit) != 1)
356 SDDS_Bomb(
"invalid -maximumLimit syntax");
357 break;
358 case SET_MINLIMIT:
359 outlierControl.flags |= OUTLIER_MINLIMIT_GIVEN | OUTLIER_CONTROL_INVOKED;
360 if (scanned[iArg].n_items != 2 ||
361 sscanf(scanned[iArg].list[1], "%lf", &outlierControl.minimumLimit) != 1)
362 SDDS_Bomb(
"invalid -minimumLimit syntax");
363 break;
364 case SET_REPLACE:
365 if (scanned[iArg].n_items != 2)
367 scanned[iArg].n_items -= 1;
368 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
369 "lastvalue", -1, NULL, 0, OUTLIER_REPLACELAST,
370 "nextvalue", -1, NULL, 0, OUTLIER_REPLACENEXT,
371 "interpolatedvalue", -1, NULL, 0, OUTLIER_REPLACEINTERP,
372 "value",
SDDS_DOUBLE, &outlierControl.replacementValue, 1, OUTLIER_REPLACEVALUE, NULL))
373 SDDS_Bomb(
"invalid -replace syntax/values");
374 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
375 break;
376 case SET_PERCENTILE_LIMIT:
377 if (scanned[iArg].n_items < 3)
378 SDDS_Bomb(
"invalid -percentileLimit syntax");
379 scanned[iArg].n_items -= 1;
380 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
381 "lower",
SDDS_DOUBLE, &outlierControl.percentilePoint[0], 1, OUTLIER_PERCENTILE_LOWER,
382 "upper",
SDDS_DOUBLE, &outlierControl.percentilePoint[1], 1, OUTLIER_PERCENTILE_UPPER, NULL) ||
383 !(tmpFlags & OUTLIER_PERCENTILE_LOWER) || !(tmpFlags & OUTLIER_PERCENTILE_UPPER) ||
384 outlierControl.percentilePoint[0] >= outlierControl.percentilePoint[1])
385 SDDS_Bomb(
"invalid -percentileLimit syntax");
386 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
387 break;
388 case SET_UNPOPULAR:
389 if (scanned[iArg].n_items < 2)
391 scanned[iArg].n_items -= 1;
392 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
393 "bins",
SDDS_LONG, &(outlierControl.unpopularBins), 1, OUTLIER_UNPOPULAR_BINS, NULL) ||
394 !(tmpFlags & OUTLIER_UNPOPULAR_BINS) || outlierControl.unpopularBins < 2)
396 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
397 break;
398 default:
399 fprintf(stderr, "Error: Unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
400 exit(EXIT_FAILURE);
401 break;
402 }
403 } else {
404 if (!input)
405 input = scanned[iArg].list[0];
406 else if (!output)
407 output = scanned[iArg].list[0];
408 else
410 }
411 }
412 if (outlierControl.flags & OUTLIER_REPLACEFLAGS && outlierControl.flags & OUTLIER_MARKONLY)
413 SDDS_Bomb(
"Cannot use -replaceOnly and -markOnly simultaneously.");
414
415 processFilenames(
"sddsoutlier", &input, &output, pipeFlags, noWarnings, &tmpfileUsed);
416
417 if (!(outlierControl.flags & OUTLIER_CONTROL_INVOKED)) {
418 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_STDEV_GIVEN | OUTLIER_STDEVLIMIT_GIVEN;
419 outlierControl.stDevLimit = 2;
420 }
421
425
426 if (columnMajorOrder != -1)
427 SDDSout.layout.data_mode.column_major = columnMajorOrder;
428 else
429 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
430
431 if (outlierControl.flags & OUTLIER_MARKONLY &&
435 }
436
439
440 if ((columns = expandColumnPairNames(&SDDSout, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0)) <= 0) {
442 SDDS_Bomb(
"No columns selected for outlier control.");
443 }
444
445 isOutlier = NULL;
449
453 continue;
454 }
455 if (outlierControl.flags & OUTLIER_MARKONLY) {
456 if (isOutlierIndex >= 0) {
458 SDDS_Bomb(
"Unable to retrieve 'IsOutlier' column from input file despite its existence.");
459 } else {
460 long i;
461 isOutlier =
SDDS_Realloc(isOutlier,
sizeof(*isOutlier) * rows);
462 if (!isOutlier)
464 for (i = 0; i < rows; i++)
465 isOutlier[i] = 0;
466 }
467 }
468 if (outlierControl.flags & OUTLIER_VERBOSE_GIVEN)
469 fprintf(stderr, "%" PRId64 " rows in page %ld\n", rows, readCode);
470 if ((rows = removeOutliers(&SDDSout, rows, column, columns, &outlierControl, isOutlier)) == 0) {
471 if (!noWarnings)
472 fprintf(stderr, " No rows left after outlier control--skipping page.\n");
473 continue;
474 }
475 if (outlierControl.flags & OUTLIER_VERBOSE_GIVEN)
476 fprintf(stderr, "%" PRId64 " rows left after outlier control\n", rows);
478 fprintf(stderr,
"Problem with row selection:\n %" PRId64
" expected, %" PRId64
" counted\n", rows,
SDDS_CountRowsOfInterest(&SDDSout));
479 exit(EXIT_FAILURE);
480 }
484 }
488 exit(EXIT_FAILURE);
489 return EXIT_SUCCESS;
490}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromLongs(SDDS_DATASET *SDDS_dataset, int32_t mode, int32_t *data, int64_t rows,...)
Sets the values for a single data column using long integer numbers.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.