175 {
176 int iArg;
177 char **column, **excludeColumn;
178 long columns, excludeColumns;
179 char *input, *output;
180 SCANNED_ARG *scanned;
182 long readCode, dataLimitGiven, tmpfileUsed;
183 int64_t i, rows;
184 long noWarnings, isOutlierIndex;
185 int32_t *isOutlier;
187 unsigned long pipeFlags, tmpFlags, majorOrderFlag, dummyFlags;
188 short columnMajorOrder = -1;
189
191 argc =
scanargs(&scanned, argc, argv);
192 if (argc < 3) {
194 }
195
196 output = input = NULL;
197 columns = excludeColumns = dataLimitGiven = 0;
198 column = excludeColumn = NULL;
199
200 outlierControl.flags = 0;
201 outlierControl.passes = 1;
202 outlierControl.neighbors = 0;
203 pipeFlags = tmpfileUsed = noWarnings = isOutlierIndex = 0;
204
205 for (iArg = 1; iArg < argc; iArg++) {
206 if (scanned[iArg].arg_type == OPTION) {
207
208 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
209 case SET_MAJOR_ORDER:
210 majorOrderFlag = 0;
211 scanned[iArg].n_items--;
212 if (scanned[iArg].n_items > 0 &&
213 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
214 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
215 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
216 SDDS_Bomb(
"invalid -majorOrder syntax/values");
217 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
218 columnMajorOrder = 1;
219 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
220 columnMajorOrder = 0;
221 break;
222 case SET_COLUMNS:
223 if (columns)
224 SDDS_Bomb(
"only one -columns option may be given");
225 if (scanned[iArg].n_items < 2)
227 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
228 for (i = 0; i < columns; i++)
229 column[i] = scanned[iArg].list[i + 1];
230 break;
231 case SET_EXCLUDE:
232 if (excludeColumns)
233 SDDS_Bomb(
"only one -excludecolumns option may be given");
234 if (scanned[iArg].n_items < 2)
235 SDDS_Bomb(
"invalid -excludecolumns syntax");
236 excludeColumn =
tmalloc(
sizeof(*excludeColumn) * (excludeColumns = scanned[iArg].n_items - 1));
237 for (i = 0; i < excludeColumns; i++)
238 excludeColumn[i] = scanned[iArg].list[i + 1];
239 break;
240 case SET_STDDEV_LIMIT:
241 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &outlierControl.stDevLimit) != 1 ||
242 outlierControl.stDevLimit <= 0)
244 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_STDEV_GIVEN | OUTLIER_STDEVLIMIT_GIVEN;
245 break;
246 case SET_ABS_LIMIT:
247 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &outlierControl.absoluteLimit) != 1 ||
248 outlierControl.absoluteLimit <= 0)
250 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_ABSLIMIT_GIVEN;
251 break;
252 case SET_ABSDEV_LIMIT:
253 if (scanned[iArg].n_items < 2)
254 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
255 if (scanned[iArg].n_items == 2) {
256 if (sscanf(scanned[iArg].list[1], "%lf", &outlierControl.absDevLimit) != 1 || outlierControl.absDevLimit <= 0)
257 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
258 } else {
259 if (sscanf(scanned[iArg].list[1], "%lf", &outlierControl.absDevLimit) != 1 || outlierControl.absDevLimit <= 0)
260 SDDS_Bomb(
"invalid -absDeviationLimit syntax");
261 scanned[iArg].list += 2;
262 scanned[iArg].n_items -= 2;
263 if (scanned[iArg].n_items > 0 &&
264 (!
scanItemList(&dummyFlags, scanned[iArg].list, &scanned[iArg].n_items, 0,
"neighbors",
SDDS_LONG, &(outlierControl.neighbors), 1, 0, NULL)))
265 SDDS_Bomb(
"invalid -absDeviationLimit syntax/value");
266 if (outlierControl.neighbors % 2 == 0)
267 outlierControl.neighbors += 1;
268
269 scanned[iArg].list -= 2;
270 scanned[iArg].n_items += 2;
271 }
272 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_ABSDEVLIMIT_GIVEN;
273 break;
274 case SET_VERBOSE:
275 outlierControl.flags |= OUTLIER_VERBOSE_GIVEN;
276 break;
277 case SET_PIPE:
278 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
280 break;
281 case SET_NOWARNINGS:
282 noWarnings = 1;
283 break;
284 case SET_INVERT:
285 outlierControl.flags |= OUTLIER_INVERT_GIVEN;
286 break;
287 case SET_MARKONLY:
288 outlierControl.flags |= OUTLIER_MARKONLY;
289 break;
290 case SET_CHANCELIMIT:
291 if (scanned[iArg].n_items != 2 ||
292 sscanf(scanned[iArg].list[1], "%lf", &outlierControl.chanceLimit) != 1 ||
293 outlierControl.chanceLimit <= 0)
294 SDDS_Bomb(
"invalid -chanceLimit syntax");
295 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_CHANCELIMIT_GIVEN;
296 break;
297 case SET_PASSES:
298 if (scanned[iArg].n_items != 2 ||
299 sscanf(scanned[iArg].list[1], "%ld", &outlierControl.passes) != 1 ||
300 outlierControl.passes < 1)
302 break;
303 case SET_MAXLIMIT:
304 outlierControl.flags |= OUTLIER_MAXLIMIT_GIVEN | OUTLIER_CONTROL_INVOKED;
305 if (scanned[iArg].n_items != 2 ||
306 sscanf(scanned[iArg].list[1], "%lf", &outlierControl.maximumLimit) != 1)
307 SDDS_Bomb(
"invalid -maximumLimit syntax");
308 break;
309 case SET_MINLIMIT:
310 outlierControl.flags |= OUTLIER_MINLIMIT_GIVEN | OUTLIER_CONTROL_INVOKED;
311 if (scanned[iArg].n_items != 2 ||
312 sscanf(scanned[iArg].list[1], "%lf", &outlierControl.minimumLimit) != 1)
313 SDDS_Bomb(
"invalid -minimumLimit syntax");
314 break;
315 case SET_REPLACE:
316 if (scanned[iArg].n_items != 2)
318 scanned[iArg].n_items -= 1;
319 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
320 "lastvalue", -1, NULL, 0, OUTLIER_REPLACELAST,
321 "nextvalue", -1, NULL, 0, OUTLIER_REPLACENEXT,
322 "interpolatedvalue", -1, NULL, 0, OUTLIER_REPLACEINTERP,
323 "value",
SDDS_DOUBLE, &outlierControl.replacementValue, 1, OUTLIER_REPLACEVALUE, NULL))
324 SDDS_Bomb(
"invalid -replace syntax/values");
325 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
326 break;
327 case SET_PERCENTILE_LIMIT:
328 if (scanned[iArg].n_items < 3)
329 SDDS_Bomb(
"invalid -percentileLimit syntax");
330 scanned[iArg].n_items -= 1;
331 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
332 "lower",
SDDS_DOUBLE, &outlierControl.percentilePoint[0], 1, OUTLIER_PERCENTILE_LOWER,
333 "upper",
SDDS_DOUBLE, &outlierControl.percentilePoint[1], 1, OUTLIER_PERCENTILE_UPPER, NULL) ||
334 !(tmpFlags & OUTLIER_PERCENTILE_LOWER) || !(tmpFlags & OUTLIER_PERCENTILE_UPPER) ||
335 outlierControl.percentilePoint[0] >= outlierControl.percentilePoint[1])
336 SDDS_Bomb(
"invalid -percentileLimit syntax");
337 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
338 break;
339 case SET_UNPOPULAR:
340 if (scanned[iArg].n_items < 2)
342 scanned[iArg].n_items -= 1;
343 if (!
scanItemList(&tmpFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
344 "bins",
SDDS_LONG, &(outlierControl.unpopularBins), 1, OUTLIER_UNPOPULAR_BINS, NULL) ||
345 !(tmpFlags & OUTLIER_UNPOPULAR_BINS) || outlierControl.unpopularBins < 2)
347 outlierControl.flags |= tmpFlags | OUTLIER_CONTROL_INVOKED;
348 break;
349 default:
350 fprintf(stderr, "Error: Unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
351 exit(EXIT_FAILURE);
352 break;
353 }
354 } else {
355 if (!input)
356 input = scanned[iArg].list[0];
357 else if (!output)
358 output = scanned[iArg].list[0];
359 else
361 }
362 }
363 if (outlierControl.flags & OUTLIER_REPLACEFLAGS && outlierControl.flags & OUTLIER_MARKONLY)
364 SDDS_Bomb(
"Cannot use -replaceOnly and -markOnly simultaneously.");
365
366 processFilenames(
"sddsoutlier", &input, &output, pipeFlags, noWarnings, &tmpfileUsed);
367
368 if (!(outlierControl.flags & OUTLIER_CONTROL_INVOKED)) {
369 outlierControl.flags |= OUTLIER_CONTROL_INVOKED | OUTLIER_STDEV_GIVEN | OUTLIER_STDEVLIMIT_GIVEN;
370 outlierControl.stDevLimit = 2;
371 }
372
376
377 if (columnMajorOrder != -1)
378 SDDSout.layout.data_mode.column_major = columnMajorOrder;
379 else
380 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
381
382 if (outlierControl.flags & OUTLIER_MARKONLY &&
386 }
387
390
391 if ((columns = expandColumnPairNames(&SDDSout, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0)) <= 0) {
393 SDDS_Bomb(
"No columns selected for outlier control.");
394 }
395
396 isOutlier = NULL;
400
404 continue;
405 }
406 if (outlierControl.flags & OUTLIER_MARKONLY) {
407 if (isOutlierIndex >= 0) {
409 SDDS_Bomb(
"Unable to retrieve 'IsOutlier' column from input file despite its existence.");
410 } else {
411 long i;
412 isOutlier =
SDDS_Realloc(isOutlier,
sizeof(*isOutlier) * rows);
413 if (!isOutlier)
415 for (i = 0; i < rows; i++)
416 isOutlier[i] = 0;
417 }
418 }
419 if (outlierControl.flags & OUTLIER_VERBOSE_GIVEN)
420 fprintf(stderr, "%" PRId64 " rows in page %ld\n", rows, readCode);
421 if ((rows = removeOutliers(&SDDSout, rows, column, columns, &outlierControl, isOutlier)) == 0) {
422 if (!noWarnings)
423 fprintf(stderr, " No rows left after outlier control--skipping page.\n");
424 continue;
425 }
426 if (outlierControl.flags & OUTLIER_VERBOSE_GIVEN)
427 fprintf(stderr, "%" PRId64 " rows left after outlier control\n", rows);
429 fprintf(stderr,
"Problem with row selection:\n %" PRId64
" expected, %" PRId64
" counted\n", rows,
SDDS_CountRowsOfInterest(&SDDSout));
430 exit(EXIT_FAILURE);
431 }
435 }
439 exit(EXIT_FAILURE);
440 return EXIT_SUCCESS;
441}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromLongs(SDDS_DATASET *SDDS_dataset, int32_t mode, int32_t *data, int64_t rows,...)
Sets the values for a single data column using long integer numbers.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.