SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddssmooth.c File Reference

Detailed Description

Smooths data columns in SDDS-format files using various techniques.

This program smooths data columns in SDDS-format files using techniques such as nearest-neighbor averaging, Gaussian convolution, median filtering, and Savitzky-Golay filtering. It also supports despiking and the creation of new or difference columns. Users can specify a variety of options for input/output customization and processing methods.

Usage

sddssmooth [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
-columns=<name>[,...]
[-points=<oddInteger>]
[-passes=<integer>]
[-gaussian=<sigmaValueIn#Rows>]
[-despike[=neighbors=<integer>,passes=<integer>,averageOf=<integer>,threshold=<value>]]
[-SavitzkyGolay=<left>,<right>,<order>[,<derivativeOrder>]]
[-medianFilter=windowSize=<integer>]
[-newColumns]
[-differenceColumns]
[-nowarnings]
[-majorOrder=row|column]

Options

Required Description
-columns Specifies column names (wildcards allowed).
Optional Description
-pipe SDDS pipe input/output.
-points Sets points for nearest-neighbor averaging (odd integer, default: 3).
-passes Number of smoothing passes (default: 1, 0 disables smoothing).
-gaussian Gaussian kernel smoothing with specified sigma.
-despike Despike using neighbors, passes, averageOf, and threshold.
-SavitzkyGolay Savitzky-Golay filter with specified window and order.
-medianFilter Median filtering with specified window size (odd integer, default: 3).
-newColumns Create new columns with smoothed data.
-differenceColumns Create difference columns (original minus smoothed).
-nowarnings Suppress warnings.
-majorOrder Specify row or column major order for data.

Incompatibilities

  • -SavitzkyGolay is incompatible with:
    • -passes (nearest-neighbor smoothing is not done if -SavitzkyGolay is specified).
  • -despike must be applied before other smoothing options.
  • -medianFilter requires an odd integer window size.
  • -points requires an odd integer.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, C. Saunders, R. Soliday, H. Shang

Definition in file sddssmooth.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include <ctype.h>

Go to the source code of this file.

Functions

long resolveColumnNames (SDDS_DATASET *SDDSin, char ***column, int32_t *columns)
 
void gaussianConvolution (double *data, int64_t rows, double sigma)
 
int main (int argc, char **argv)
 

Function Documentation

◆ gaussianConvolution()

void gaussianConvolution ( double * data,
int64_t rows,
double sigma )

Definition at line 444 of file sddssmooth.c.

444 {
445 double *data1, *expFactor;
446 int64_t i, j, j1, j2, nsRows, nsPerSide;
447
448 nsPerSide = 6 * sigma;
449 nsRows = 2 * nsPerSide + 1;
450 expFactor = tmalloc(sizeof(*expFactor) * nsRows);
451 for (j = -nsPerSide; j <= nsPerSide; j++)
452 expFactor[j + nsPerSide] = exp(-sqr(j / sigma) / 2.0) / (sigma * sqrt(2 * PI));
453
454 data1 = calloc(rows, sizeof(*data1));
455 for (i = 0; i < rows; i++) {
456 j1 = i - nsPerSide;
457 j2 = i + nsPerSide;
458 if (j1 < 0)
459 j1 = 0;
460 if (j2 >= rows)
461 j2 = rows;
462 for (j = j1; j <= j2; j++)
463 data1[i] += data[j] * expFactor[j - i + nsPerSide];
464 }
465 memcpy(data, data1, sizeof(*data) * rows);
466 free(data1);
467}
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59

◆ main()

int main ( int argc,
char ** argv )

Definition at line 156 of file sddssmooth.c.

156 {
157 int iArg;
158 char **inputColumn, **outputColumn, **difColumn;
159 int32_t columns;
160 long despike, median, smooth;
161 int32_t despikeNeighbors, despikePasses, despikeAverageOf;
162 char *input, *output;
163 long i, readCode;
164 int64_t j, rows;
165 int32_t smoothPoints, smoothPasses;
166 long noWarnings, medianWindowSize = 3;
167 unsigned long pipeFlags, flags, despikeFlags, majorOrderFlag, dummyFlags;
168 SCANNED_ARG *scanned;
169 SDDS_DATASET SDDSin, SDDSout;
170 double *data, despikeThreshold;
171 int32_t SGLeft, SGRight, SGOrder, SGDerivOrder;
172 short columnMajorOrder = -1;
173 double gaussianSigma = 0;
174
176 argc = scanargs(&scanned, argc, argv);
177 if (argc < 3 || argc > (3 + N_OPTIONS))
178 bomb(NULL, USAGE);
179
180 output = input = NULL;
181 inputColumn = outputColumn = NULL;
182 columns = 0;
183 pipeFlags = 0;
184 smoothPoints = 3;
185 smoothPasses = 1;
186 flags = 0;
187 despike = 0;
188 median = 0;
189 smooth = 0;
190 noWarnings = 0;
191 SGOrder = -1;
192 SGDerivOrder = 0;
193
194 for (iArg = 1; iArg < argc; iArg++) {
195 if (scanned[iArg].arg_type == OPTION) {
196 /* process options here */
197 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
198 case CLO_PASSES:
199 smooth = 1;
200 if (scanned[iArg].n_items != 2 ||
201 sscanf(scanned[iArg].list[1], "%" SCNd32, &smoothPasses) != 1 ||
202 smoothPasses < 0)
203 SDDS_Bomb("invalid -passes syntax/value");
204 break;
205 case CLO_GAUSSIAN:
206 gaussianSigma = 0;
207 if (scanned[iArg].n_items != 2 ||
208 sscanf(scanned[iArg].list[1], "%lf", &gaussianSigma) != 1 ||
209 gaussianSigma <= 0)
210 SDDS_Bomb("invalid -gaussian syntax/value");
211 break;
212 case CLO_POINTS:
213 if (scanned[iArg].n_items != 2 ||
214 sscanf(scanned[iArg].list[1], "%" SCNd32, &smoothPoints) != 1 ||
215 smoothPoints < 1 ||
216 smoothPoints % 2 == 0)
217 SDDS_Bomb("invalid -points syntax/value");
218 break;
219 case CLO_COLUMNS:
220 if (scanned[iArg].n_items < 2)
221 SDDS_Bomb("invalid -columns syntax");
222 inputColumn = tmalloc(sizeof(*inputColumn) * (columns = scanned[iArg].n_items - 1));
223 for (i = 0; i < columns; i++)
224 inputColumn[i] = scanned[iArg].list[i + 1];
225 break;
226 case CLO_PIPE:
227 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
228 SDDS_Bomb("invalid -pipe syntax");
229 break;
230 case CLO_NEWCOLUMNS:
231 flags |= FL_NEWCOLUMNS;
232 break;
233 case CLO_DIFFERENCECOLUMNS:
234 flags |= FL_DIFCOLUMNS;
235 break;
236 case CLO_DESPIKE:
237 scanned[iArg].n_items--;
238 despikeNeighbors = 4;
239 despikePasses = 1;
240 despikeThreshold = 0;
241 despikeAverageOf = 2;
242 if (scanned[iArg].n_items > 0 &&
243 (!scanItemList(&despikeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
244 "neighbors", SDDS_LONG, &despikeNeighbors, 1, 0,
245 "passes", SDDS_LONG, &despikePasses, 1, 0,
246 "averageof", SDDS_LONG, &despikeAverageOf, 1, DESPIKE_AVERAGEOF,
247 "threshold", SDDS_DOUBLE, &despikeThreshold, 1, 0, NULL) ||
248 despikeNeighbors < 2 || despikePasses < 1 || despikeAverageOf < 2 || despikeThreshold < 0)) {
249 fprintf(stderr, "sddssmooth: Invalid -despike syntax/values: neighbors=%" PRId32 ", passes=%" PRId32 ", averageOf=%" PRId32 ", threshold=%e\n", despikeNeighbors, despikePasses, despikeAverageOf, despikeThreshold);
250 exit(EXIT_FAILURE);
251 }
252 if (!(despikeFlags & DESPIKE_AVERAGEOF))
253 despikeAverageOf = despikeNeighbors;
254 if (despikeAverageOf > despikeNeighbors)
255 SDDS_Bomb("invalid -despike syntax/values: averageOf>neighbors");
256 despike = 1;
257 break;
258 case CLO_MEDIAN_FILTER:
259 scanned[iArg].n_items--;
260 medianWindowSize = 0;
261 if (scanned[iArg].n_items > 0 &&
262 (!scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
263 "windowSize", SDDS_LONG, &medianWindowSize, 1, 0, NULL) ||
264 medianWindowSize <= 0)) {
265 fprintf(stderr, "sddssmooth: Invalid -medianFilter syntax/values: windowSize=%ld\n", medianWindowSize);
266 exit(EXIT_FAILURE);
267 }
268 median = 1;
269 break;
270 case CLO_NOWARNINGS:
271 noWarnings = 1;
272 break;
273 case CLO_SAVITZKYGOLAY:
274 if ((scanned[iArg].n_items != 4 && scanned[iArg].n_items != 5) ||
275 sscanf(scanned[iArg].list[1], "%" SCNd32, &SGLeft) != 1 ||
276 sscanf(scanned[iArg].list[2], "%" SCNd32, &SGRight) != 1 ||
277 sscanf(scanned[iArg].list[3], "%" SCNd32, &SGOrder) != 1 ||
278 (scanned[iArg].n_items == 5 && sscanf(scanned[iArg].list[4], "%" SCNd32, &SGDerivOrder) != 1) ||
279 SGLeft < 0 ||
280 SGRight < 0 ||
281 (SGLeft + SGRight) < SGOrder ||
282 SGOrder < 0 ||
283 SGDerivOrder < 0)
284 SDDS_Bomb("invalid -SavitzkyGolay syntax/values");
285 break;
286 case CLO_MAJOR_ORDER:
287 majorOrderFlag = 0;
288 scanned[iArg].n_items--;
289 if (scanned[iArg].n_items > 0 &&
290 (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
291 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
292 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
293 SDDS_Bomb("invalid -majorOrder syntax/values");
294 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
295 columnMajorOrder = 1;
296 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
297 columnMajorOrder = 0;
298 break;
299 default:
300 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
301 exit(EXIT_FAILURE);
302 break;
303 }
304 } else {
305 if (!input)
306 input = scanned[iArg].list[0];
307 else if (!output)
308 output = scanned[iArg].list[0];
309 else
310 SDDS_Bomb("too many filenames seen");
311 }
312 }
313
314 processFilenames("sddssmooth", &input, &output, pipeFlags, 0, NULL);
315 /*do not change the previous setting; before it does smooth by default before adding median */
316 if (!median)
317 smooth = 1;
318
319 if (!despike && !smoothPasses && !median && !noWarnings)
320 fprintf(stderr, "warning: smoothing parameters won't result in any change in data (sddssmooth)\n");
321
322 if (!columns)
323 SDDS_Bomb("supply the names of columns to smooth with the -columns option");
324
325 if (!SDDS_InitializeInput(&SDDSin, input))
326 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
327
328 if (!resolveColumnNames(&SDDSin, &inputColumn, &columns))
329 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
330 if (!columns)
331 SDDS_Bomb("no columns selected for smoothing");
332
333 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
334 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
335
336 outputColumn = tmalloc(sizeof(*outputColumn) * columns);
337
338 if (flags & FL_NEWCOLUMNS) {
339 for (i = 0; i < columns; i++) {
340 if (SGDerivOrder <= 0) {
341 outputColumn[i] = tmalloc(sizeof(**outputColumn) * (strlen(inputColumn[i]) + 1 + strlen("Smoothed")));
342 sprintf(outputColumn[i], "%sSmoothed", inputColumn[i]);
343 } else {
344 outputColumn[i] = tmalloc(sizeof(**outputColumn) * (strlen(inputColumn[i]) + 1 + strlen("SmoothedDeriv")) + 5);
345 sprintf(outputColumn[i], "%sSmoothedDeriv%" PRId32, inputColumn[i], SGDerivOrder);
346 }
347 if (!SDDS_TransferColumnDefinition(&SDDSout, &SDDSin, inputColumn[i], outputColumn[i]))
348 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
349 }
350 } else
351 for (i = 0; i < columns; i++)
352 outputColumn[i] = inputColumn[i];
353
354 difColumn = NULL;
355 if (flags & FL_DIFCOLUMNS) {
356 difColumn = tmalloc(sizeof(*difColumn) * columns);
357 for (i = 0; i < columns; i++) {
358 difColumn[i] = tmalloc(sizeof(**difColumn) * (strlen(inputColumn[i]) + 1 + strlen("Unsmooth")));
359 sprintf(difColumn[i], "%sUnsmooth", inputColumn[i]);
360 if (!SDDS_TransferColumnDefinition(&SDDSout, &SDDSin, inputColumn[i], difColumn[i]))
361 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
362 }
363 }
364
365 if ((SDDS_GetParameterIndex(&SDDSout, "SmoothPoints") < 0 &&
366 SDDS_DefineParameter1(&SDDSout, "SmoothPoints", NULL, NULL, NULL, NULL, SDDS_LONG, &smoothPoints) < 0) ||
367 (SDDS_GetParameterIndex(&SDDSout, "SmoothPasses") < 0 &&
368 SDDS_DefineParameter1(&SDDSout, "SmoothPasses", NULL, NULL, NULL, NULL, SDDS_LONG, &smoothPasses) < 0))
369 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
370 if (columnMajorOrder != -1)
371 SDDSout.layout.data_mode.column_major = columnMajorOrder;
372 else
373 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
374
375 if (!SDDS_WriteLayout(&SDDSout))
376 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
377
378 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
379 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
380 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
381 if ((rows = SDDS_CountRowsOfInterest(&SDDSin))) {
382 for (i = 0; i < columns; i++) {
383 if (!(data = SDDS_GetColumnInDoubles(&SDDSin, inputColumn[i])))
384 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
385 if (despike)
386 despikeData(data, rows, despikeNeighbors, despikePasses, despikeAverageOf, despikeThreshold, 0);
387 if (gaussianSigma > 0)
388 gaussianConvolution(data, rows, gaussianSigma);
389 if (median) {
390 double *mData = NULL;
391 mData = malloc(sizeof(*mData) * rows);
392 median_filter(data, mData, rows, medianWindowSize);
393 memcpy(data, mData, sizeof(*mData) * rows);
394 free(mData);
395 }
396 if (SGOrder >= 0) {
397 long pass = 0;
398 for (pass = 0; pass < smoothPasses; pass++)
399 SavitzkyGolaySmooth(data, rows, SGOrder, SGLeft, SGRight, SGDerivOrder);
400 } else if (smooth && smoothPasses)
401 smoothData(data, rows, smoothPoints, smoothPasses);
402
403 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, data, rows, outputColumn[i]))
404 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
405 if (flags & FL_DIFCOLUMNS) {
406 double *data0;
407 if (!(data0 = SDDS_GetColumnInDoubles(&SDDSin, inputColumn[i])))
408 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
409 for (j = 0; j < rows; j++)
410 data0[j] -= data[j];
411 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, data0, rows, difColumn[i]))
412 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
413 free(data0);
414 }
415 free(data);
416 }
417 }
418 if (!SDDS_WritePage(&SDDSout))
419 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
420 }
421 if (!SDDS_Terminate(&SDDSin) || !SDDS_Terminate(&SDDSout)) {
422 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
423 exit(EXIT_FAILURE);
424 }
425 return EXIT_SUCCESS;
426}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineParameter1(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, void *fixed_value)
Defines a data parameter with a fixed numerical value.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
void median_filter(double *x, double *m, long n, long W)
Applies a median filter to an input signal.
long SavitzkyGolaySmooth(double *data, long rows, long order, long nLeft, long nRight, long derivativeOrder)
Applies Savitzky-Golay smoothing or differentiation to a data array.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
void smoothData(double *data, long rows, long smoothPoints, long smoothPasses)
Smooth a data array using a moving average.
Definition smooth.c:35
long despikeData(double *data, long rows, long neighbors, long passes, long averageOf, double threshold, long countLimit)
Remove spikes from a data array by comparing each point to its neighbors.
Definition smooth.c:86

◆ resolveColumnNames()

long resolveColumnNames ( SDDS_DATASET * SDDSin,
char *** column,
int32_t * columns )

Definition at line 428 of file sddssmooth.c.

428 {
429 long i;
430
431 SDDS_SetColumnFlags(SDDSin, 0);
432 for (i = 0; i < *columns; i++) {
433 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, (*column)[i], SDDS_OR))
434 return 0;
435 }
436 free(*column);
437 if (!(*column = SDDS_GetColumnNames(SDDSin, columns)) || *columns == 0) {
438 SDDS_SetError("no columns found");
439 return 0;
440 }
441 return 1;
442}
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.