33 CLO_DIFFERENCECOLUMNS,
43char *option[N_OPTIONS] = {
59 "Usage: sddssmooth [options] [inputfile] [outputfile]\n"
61 " -pipe=[input][,output] The standard SDDS Toolkit pipe option.\n"
62 " -columns=<name>[,...] Specifies the names of the column(s) to smooth. The names may include wildcards.\n"
63 " -points=<oddInteger> Specifies the number of points to average to create a smoothed value for each point.\n"
64 " Must be an odd integer. Default is 3.\n"
65 " -passes=<integer> Specifies the number of nearest-neighbor-averaging smoothing passes to make over each column of data.\n"
66 " Default is 1. If 0, no such smoothing is done. In the limit of an infinite number of passes,\n"
67 " every point will tend toward the average value of the original data.\n"
68 " If -despike is also given, then despiking occurs first.\n"
69 " -gaussian=<sigmaValueIn#Rows> Smooths with a Gaussian kernel using the given sigma. Sigma is expressed in terms of the number of rows.\n"
70 " -despike[=neighbors=<integer>,passes=<integer>,averageOf=<integer>,threshold=<value>]\n"
71 " Specifies smoothing by despiking. By default, 4 nearest-neighbors are used and 1 pass is done.\n"
72 " If this option is not given, no despiking is done.\n"
73 " -SavitzkyGolay=<left>,<right>,<order>[,<derivativeOrder>]\n"
74 " Specifies smoothing by using a Savitzky-Golay filter, which involves fitting a polynomial of specified order through left + right + 1 points.\n"
75 " Optionally, takes the derivativeOrder-th derivative of the data.\n"
76 " If this option is given, nearest-neighbor-averaging smoothing is not done.\n"
77 " If -despike is also given, then despiking occurs first.\n"
78 " -medianFilter=windowSize=<integer> Specifies median-filter-based smoothing with the given window size (must be an odd integer, default is 3).\n"
79 " It smooths the original data by finding the median of a data point among the nearest left (W-1)/2 points,\n"
80 " the data point itself, and the nearest right (W-1)/2 points.\n"
81 " -newColumns Specifies that the smoothed data will be placed in new columns, rather than replacing\n"
82 " the data in each column with the smoothed result. The new columns are named columnNameSmoothed,\n"
83 " where columnName is the original name of a column.\n"
84 " -differenceColumns Specifies that additional columns be created in the output file, containing the difference between\n"
85 " the original data and the smoothed data. The new columns are named columnNameUnsmooth,\n"
86 " where columnName is the original name of the column.\n"
87 " -nowarnings Suppresses warning messages.\n"
88 " -majorOrder=row|column Specifies the major order for data processing: row or column.\n"
89 "\nProgram by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
91long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns);
92void gaussianConvolution(
double *data, int64_t rows,
double sigma);
94#define FL_NEWCOLUMNS 0x0001UL
95#define FL_DIFCOLUMNS 0x0002UL
97#define DESPIKE_AVERAGEOF 0x0001U
99int main(
int argc,
char **argv) {
101 char **inputColumn, **outputColumn, **difColumn;
103 long despike, median, smooth;
104 int32_t despikeNeighbors, despikePasses, despikeAverageOf;
105 char *input, *output;
108 int32_t smoothPoints, smoothPasses;
109 long noWarnings, medianWindowSize = 3;
110 unsigned long pipeFlags, flags, despikeFlags, majorOrderFlag, dummyFlags;
111 SCANNED_ARG *scanned;
113 double *data, despikeThreshold;
114 int32_t SGLeft, SGRight, SGOrder, SGDerivOrder;
115 short columnMajorOrder = -1;
116 double gaussianSigma = 0;
119 argc =
scanargs(&scanned, argc, argv);
120 if (argc < 3 || argc > (3 + N_OPTIONS))
123 output = input = NULL;
124 inputColumn = outputColumn = NULL;
137 for (iArg = 1; iArg < argc; iArg++) {
138 if (scanned[iArg].arg_type == OPTION) {
140 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
143 if (scanned[iArg].n_items != 2 ||
144 sscanf(scanned[iArg].list[1],
"%" SCNd32, &smoothPasses) != 1 ||
146 SDDS_Bomb(
"invalid -passes syntax/value");
150 if (scanned[iArg].n_items != 2 ||
151 sscanf(scanned[iArg].list[1],
"%lf", &gaussianSigma) != 1 ||
153 SDDS_Bomb(
"invalid -gaussian syntax/value");
156 if (scanned[iArg].n_items != 2 ||
157 sscanf(scanned[iArg].list[1],
"%" SCNd32, &smoothPoints) != 1 ||
159 smoothPoints % 2 == 0)
160 SDDS_Bomb(
"invalid -points syntax/value");
163 if (scanned[iArg].n_items < 2)
165 inputColumn =
tmalloc(
sizeof(*inputColumn) * (columns = scanned[iArg].n_items - 1));
166 for (i = 0; i < columns; i++)
167 inputColumn[i] = scanned[iArg].list[i + 1];
170 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
174 flags |= FL_NEWCOLUMNS;
176 case CLO_DIFFERENCECOLUMNS:
177 flags |= FL_DIFCOLUMNS;
180 scanned[iArg].n_items--;
181 despikeNeighbors = 4;
183 despikeThreshold = 0;
184 despikeAverageOf = 2;
185 if (scanned[iArg].n_items > 0 &&
186 (!
scanItemList(&despikeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
187 "neighbors",
SDDS_LONG, &despikeNeighbors, 1, 0,
188 "passes",
SDDS_LONG, &despikePasses, 1, 0,
189 "averageof",
SDDS_LONG, &despikeAverageOf, 1, DESPIKE_AVERAGEOF,
190 "threshold",
SDDS_DOUBLE, &despikeThreshold, 1, 0, NULL) ||
191 despikeNeighbors < 2 || despikePasses < 1 || despikeAverageOf < 2 || despikeThreshold < 0)) {
192 fprintf(stderr,
"sddssmooth: Invalid -despike syntax/values: neighbors=%" PRId32
", passes=%" PRId32
", averageOf=%" PRId32
", threshold=%e\n", despikeNeighbors, despikePasses, despikeAverageOf, despikeThreshold);
195 if (!(despikeFlags & DESPIKE_AVERAGEOF))
196 despikeAverageOf = despikeNeighbors;
197 if (despikeAverageOf > despikeNeighbors)
198 SDDS_Bomb(
"invalid -despike syntax/values: averageOf>neighbors");
201 case CLO_MEDIAN_FILTER:
202 scanned[iArg].n_items--;
203 medianWindowSize = 0;
204 if (scanned[iArg].n_items > 0 &&
205 (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
206 "windowSize",
SDDS_LONG, &medianWindowSize, 1, 0, NULL) ||
207 medianWindowSize <= 0)) {
208 fprintf(stderr,
"sddssmooth: Invalid -medianFilter syntax/values: windowSize=%ld\n", medianWindowSize);
216 case CLO_SAVITZKYGOLAY:
217 if ((scanned[iArg].n_items != 4 && scanned[iArg].n_items != 5) ||
218 sscanf(scanned[iArg].list[1],
"%" SCNd32, &SGLeft) != 1 ||
219 sscanf(scanned[iArg].list[2],
"%" SCNd32, &SGRight) != 1 ||
220 sscanf(scanned[iArg].list[3],
"%" SCNd32, &SGOrder) != 1 ||
221 (scanned[iArg].n_items == 5 && sscanf(scanned[iArg].list[4],
"%" SCNd32, &SGDerivOrder) != 1) ||
224 (SGLeft + SGRight) < SGOrder ||
227 SDDS_Bomb(
"invalid -SavitzkyGolay syntax/values");
229 case CLO_MAJOR_ORDER:
231 scanned[iArg].n_items--;
232 if (scanned[iArg].n_items > 0 &&
233 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
234 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
235 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
236 SDDS_Bomb(
"invalid -majorOrder syntax/values");
237 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
238 columnMajorOrder = 1;
239 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
240 columnMajorOrder = 0;
243 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
249 input = scanned[iArg].list[0];
251 output = scanned[iArg].list[0];
262 if (!despike && !smoothPasses && !median && !noWarnings)
263 fprintf(stderr,
"warning: smoothing parameters won't result in any change in data (sddssmooth)\n");
266 SDDS_Bomb(
"supply the names of columns to smooth with the -columns option");
271 if (!resolveColumnNames(&SDDSin, &inputColumn, &columns))
274 SDDS_Bomb(
"no columns selected for smoothing");
279 outputColumn =
tmalloc(
sizeof(*outputColumn) * columns);
281 if (flags & FL_NEWCOLUMNS) {
282 for (i = 0; i < columns; i++) {
283 if (SGDerivOrder <= 0) {
284 outputColumn[i] =
tmalloc(
sizeof(**outputColumn) * (strlen(inputColumn[i]) + 1 + strlen(
"Smoothed")));
285 sprintf(outputColumn[i],
"%sSmoothed", inputColumn[i]);
287 outputColumn[i] =
tmalloc(
sizeof(**outputColumn) * (strlen(inputColumn[i]) + 1 + strlen(
"SmoothedDeriv")) + 5);
288 sprintf(outputColumn[i],
"%sSmoothedDeriv%" PRId32, inputColumn[i], SGDerivOrder);
294 for (i = 0; i < columns; i++)
295 outputColumn[i] = inputColumn[i];
298 if (flags & FL_DIFCOLUMNS) {
299 difColumn =
tmalloc(
sizeof(*difColumn) * columns);
300 for (i = 0; i < columns; i++) {
301 difColumn[i] =
tmalloc(
sizeof(**difColumn) * (strlen(inputColumn[i]) + 1 + strlen(
"Unsmooth")));
302 sprintf(difColumn[i],
"%sUnsmooth", inputColumn[i]);
313 if (columnMajorOrder != -1)
314 SDDSout.layout.data_mode.column_major = columnMajorOrder;
316 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
325 for (i = 0; i < columns; i++) {
329 despikeData(data, rows, despikeNeighbors, despikePasses, despikeAverageOf, despikeThreshold, 0);
330 if (gaussianSigma > 0)
331 gaussianConvolution(data, rows, gaussianSigma);
333 double *mData = NULL;
334 mData = malloc(
sizeof(*mData) * rows);
336 memcpy(data, mData,
sizeof(*mData) * rows);
341 for (pass = 0; pass < smoothPasses; pass++)
343 }
else if (smooth && smoothPasses)
344 smoothData(data, rows, smoothPoints, smoothPasses);
348 if (flags & FL_DIFCOLUMNS) {
352 for (j = 0; j < rows; j++)
371long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns) {
375 for (i = 0; i < *columns; i++) {
387void gaussianConvolution(
double *data, int64_t rows,
double sigma) {
388 double *data1, *expFactor;
389 int64_t i, j, j1, j2, nsRows, nsPerSide;
391 nsPerSide = 6 * sigma;
392 nsRows = 2 * nsPerSide + 1;
393 expFactor =
tmalloc(
sizeof(*expFactor) * nsRows);
394 for (j = -nsPerSide; j <= nsPerSide; j++)
395 expFactor[j + nsPerSide] = exp(-sqr(j / sigma) / 2.0) / (sigma * sqrt(2 * PI));
397 data1 = calloc(rows,
sizeof(*data1));
398 for (i = 0; i < rows; i++) {
405 for (j = j1; j <= j2; j++)
406 data1[i] += data[j] * expFactor[j - i + nsPerSide];
408 memcpy(data, data1,
sizeof(*data) * rows);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_DefineParameter1(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, void *fixed_value)
Defines a data parameter with a fixed numerical value.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long SavitzkyGolaySmooth(double *data, long rows, long order, long nLeft, long nRight, long derivativeOrder)
Applies Savitzky-Golay smoothing or differentiation to a data array.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
void smoothData(double *data, long rows, long smoothPoints, long smoothPasses)
Smooth a data array using a moving average.
long despikeData(double *data, long rows, long neighbors, long passes, long averageOf, double threshold, long countLimit)
Remove spikes from a data array by comparing each point to its neighbors.