78 CLO_DIFFERENCECOLUMNS,
88char *option[N_OPTIONS] = {
104 "sddssmooth [<inputfile>] [<outputfile>]\n"
105 " [-pipe=[input][,output]]\n"
106 " -columns=<name>[,...]\n"
107 " [-points=<oddInteger>]\n"
108 " [-passes=<integer>]\n"
109 " [-gaussian=<sigmaValueIn#Rows>]\n"
110 " [-despike[=neighbors=<integer>,passes=<integer>,averageOf=<integer>,threshold=<value>]]\n"
111 " [-SavitzkyGolay=<left>,<right>,<order>[,<derivativeOrder>]]\n"
112 " [-medianFilter=windowSize=<integer>]\n"
114 " [-differenceColumns]\n"
116 " [-majorOrder=row|column]\n"
118 " -pipe=[input][,output] The standard SDDS Toolkit pipe option.\n"
119 " -columns=<name>[,...] Specifies the names of the column(s) to smooth. The names may include wildcards.\n"
120 " -points=<oddInteger> Specifies the number of points to average to create a smoothed value for each point.\n"
121 " Must be an odd integer. Default is 3.\n"
122 " -passes=<integer> Specifies the number of nearest-neighbor-averaging smoothing passes to make over each column of data.\n"
123 " Default is 1. If 0, no such smoothing is done. In the limit of an infinite number of passes,\n"
124 " every point will tend toward the average value of the original data.\n"
125 " If -despike is also given, then despiking occurs first.\n"
126 " -gaussian=<sigmaValueIn#Rows> Smooths with a Gaussian kernel using the given sigma. Sigma is expressed in terms of the number of rows.\n"
127 " -despike[=neighbors=<integer>,passes=<integer>,averageOf=<integer>,threshold=<value>]\n"
128 " Specifies smoothing by despiking. By default, 4 nearest-neighbors are used and 1 pass is done.\n"
129 " If this option is not given, no despiking is done.\n"
130 " -SavitzkyGolay=<left>,<right>,<order>[,<derivativeOrder>]\n"
131 " Specifies smoothing by using a Savitzky-Golay filter, which involves fitting a polynomial of specified order through left + right + 1 points.\n"
132 " Optionally, takes the derivativeOrder-th derivative of the data.\n"
133 " If this option is given, nearest-neighbor-averaging smoothing is not done.\n"
134 " If -despike is also given, then despiking occurs first.\n"
135 " -medianFilter=windowSize=<integer> Specifies median-filter-based smoothing with the given window size (must be an odd integer, default is 3).\n"
136 " It smooths the original data by finding the median of a data point among the nearest left (W-1)/2 points,\n"
137 " the data point itself, and the nearest right (W-1)/2 points.\n"
138 " -newColumns Specifies that the smoothed data will be placed in new columns, rather than replacing\n"
139 " the data in each column with the smoothed result. The new columns are named columnNameSmoothed,\n"
140 " where columnName is the original name of a column.\n"
141 " -differenceColumns Specifies that additional columns be created in the output file, containing the difference between\n"
142 " the original data and the smoothed data. The new columns are named columnNameUnsmooth,\n"
143 " where columnName is the original name of the column.\n"
144 " -nowarnings Suppresses warning messages.\n"
145 " -majorOrder=row|column Specifies the major order for data processing: row or column.\n"
146 "\nProgram by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
148long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns);
149void gaussianConvolution(
double *data, int64_t rows,
double sigma);
151#define FL_NEWCOLUMNS 0x0001UL
152#define FL_DIFCOLUMNS 0x0002UL
154#define DESPIKE_AVERAGEOF 0x0001U
156int main(
int argc,
char **argv) {
158 char **inputColumn, **outputColumn, **difColumn;
160 long despike, median, smooth;
161 int32_t despikeNeighbors, despikePasses, despikeAverageOf;
162 char *input, *output;
165 int32_t smoothPoints, smoothPasses;
166 long noWarnings, medianWindowSize = 3;
167 unsigned long pipeFlags, flags, despikeFlags, majorOrderFlag, dummyFlags;
168 SCANNED_ARG *scanned;
170 double *data, despikeThreshold;
171 int32_t SGLeft, SGRight, SGOrder, SGDerivOrder;
172 short columnMajorOrder = -1;
173 double gaussianSigma = 0;
176 argc =
scanargs(&scanned, argc, argv);
177 if (argc < 3 || argc > (3 + N_OPTIONS))
180 output = input = NULL;
181 inputColumn = outputColumn = NULL;
194 for (iArg = 1; iArg < argc; iArg++) {
195 if (scanned[iArg].arg_type == OPTION) {
197 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
200 if (scanned[iArg].n_items != 2 ||
201 sscanf(scanned[iArg].list[1],
"%" SCNd32, &smoothPasses) != 1 ||
203 SDDS_Bomb(
"invalid -passes syntax/value");
207 if (scanned[iArg].n_items != 2 ||
208 sscanf(scanned[iArg].list[1],
"%lf", &gaussianSigma) != 1 ||
210 SDDS_Bomb(
"invalid -gaussian syntax/value");
213 if (scanned[iArg].n_items != 2 ||
214 sscanf(scanned[iArg].list[1],
"%" SCNd32, &smoothPoints) != 1 ||
216 smoothPoints % 2 == 0)
217 SDDS_Bomb(
"invalid -points syntax/value");
220 if (scanned[iArg].n_items < 2)
222 inputColumn =
tmalloc(
sizeof(*inputColumn) * (columns = scanned[iArg].n_items - 1));
223 for (i = 0; i < columns; i++)
224 inputColumn[i] = scanned[iArg].list[i + 1];
227 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
231 flags |= FL_NEWCOLUMNS;
233 case CLO_DIFFERENCECOLUMNS:
234 flags |= FL_DIFCOLUMNS;
237 scanned[iArg].n_items--;
238 despikeNeighbors = 4;
240 despikeThreshold = 0;
241 despikeAverageOf = 2;
242 if (scanned[iArg].n_items > 0 &&
243 (!
scanItemList(&despikeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
244 "neighbors",
SDDS_LONG, &despikeNeighbors, 1, 0,
245 "passes",
SDDS_LONG, &despikePasses, 1, 0,
246 "averageof",
SDDS_LONG, &despikeAverageOf, 1, DESPIKE_AVERAGEOF,
247 "threshold",
SDDS_DOUBLE, &despikeThreshold, 1, 0, NULL) ||
248 despikeNeighbors < 2 || despikePasses < 1 || despikeAverageOf < 2 || despikeThreshold < 0)) {
249 fprintf(stderr,
"sddssmooth: Invalid -despike syntax/values: neighbors=%" PRId32
", passes=%" PRId32
", averageOf=%" PRId32
", threshold=%e\n", despikeNeighbors, despikePasses, despikeAverageOf, despikeThreshold);
252 if (!(despikeFlags & DESPIKE_AVERAGEOF))
253 despikeAverageOf = despikeNeighbors;
254 if (despikeAverageOf > despikeNeighbors)
255 SDDS_Bomb(
"invalid -despike syntax/values: averageOf>neighbors");
258 case CLO_MEDIAN_FILTER:
259 scanned[iArg].n_items--;
260 medianWindowSize = 0;
261 if (scanned[iArg].n_items > 0 &&
262 (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
263 "windowSize",
SDDS_LONG, &medianWindowSize, 1, 0, NULL) ||
264 medianWindowSize <= 0)) {
265 fprintf(stderr,
"sddssmooth: Invalid -medianFilter syntax/values: windowSize=%ld\n", medianWindowSize);
273 case CLO_SAVITZKYGOLAY:
274 if ((scanned[iArg].n_items != 4 && scanned[iArg].n_items != 5) ||
275 sscanf(scanned[iArg].list[1],
"%" SCNd32, &SGLeft) != 1 ||
276 sscanf(scanned[iArg].list[2],
"%" SCNd32, &SGRight) != 1 ||
277 sscanf(scanned[iArg].list[3],
"%" SCNd32, &SGOrder) != 1 ||
278 (scanned[iArg].n_items == 5 && sscanf(scanned[iArg].list[4],
"%" SCNd32, &SGDerivOrder) != 1) ||
281 (SGLeft + SGRight) < SGOrder ||
284 SDDS_Bomb(
"invalid -SavitzkyGolay syntax/values");
286 case CLO_MAJOR_ORDER:
288 scanned[iArg].n_items--;
289 if (scanned[iArg].n_items > 0 &&
290 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
291 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
292 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
293 SDDS_Bomb(
"invalid -majorOrder syntax/values");
294 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
295 columnMajorOrder = 1;
296 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
297 columnMajorOrder = 0;
300 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
306 input = scanned[iArg].list[0];
308 output = scanned[iArg].list[0];
319 if (!despike && !smoothPasses && !median && !noWarnings)
320 fprintf(stderr,
"warning: smoothing parameters won't result in any change in data (sddssmooth)\n");
323 SDDS_Bomb(
"supply the names of columns to smooth with the -columns option");
328 if (!resolveColumnNames(&SDDSin, &inputColumn, &columns))
331 SDDS_Bomb(
"no columns selected for smoothing");
336 outputColumn =
tmalloc(
sizeof(*outputColumn) * columns);
338 if (flags & FL_NEWCOLUMNS) {
339 for (i = 0; i < columns; i++) {
340 if (SGDerivOrder <= 0) {
341 outputColumn[i] =
tmalloc(
sizeof(**outputColumn) * (strlen(inputColumn[i]) + 1 + strlen(
"Smoothed")));
342 sprintf(outputColumn[i],
"%sSmoothed", inputColumn[i]);
344 outputColumn[i] =
tmalloc(
sizeof(**outputColumn) * (strlen(inputColumn[i]) + 1 + strlen(
"SmoothedDeriv")) + 5);
345 sprintf(outputColumn[i],
"%sSmoothedDeriv%" PRId32, inputColumn[i], SGDerivOrder);
351 for (i = 0; i < columns; i++)
352 outputColumn[i] = inputColumn[i];
355 if (flags & FL_DIFCOLUMNS) {
356 difColumn =
tmalloc(
sizeof(*difColumn) * columns);
357 for (i = 0; i < columns; i++) {
358 difColumn[i] =
tmalloc(
sizeof(**difColumn) * (strlen(inputColumn[i]) + 1 + strlen(
"Unsmooth")));
359 sprintf(difColumn[i],
"%sUnsmooth", inputColumn[i]);
370 if (columnMajorOrder != -1)
371 SDDSout.layout.data_mode.column_major = columnMajorOrder;
373 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
382 for (i = 0; i < columns; i++) {
386 despikeData(data, rows, despikeNeighbors, despikePasses, despikeAverageOf, despikeThreshold, 0);
387 if (gaussianSigma > 0)
388 gaussianConvolution(data, rows, gaussianSigma);
390 double *mData = NULL;
391 mData = malloc(
sizeof(*mData) * rows);
393 memcpy(data, mData,
sizeof(*mData) * rows);
398 for (pass = 0; pass < smoothPasses; pass++)
400 }
else if (smooth && smoothPasses)
401 smoothData(data, rows, smoothPoints, smoothPasses);
405 if (flags & FL_DIFCOLUMNS) {
409 for (j = 0; j < rows; j++)
428long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns) {
432 for (i = 0; i < *columns; i++) {
444void gaussianConvolution(
double *data, int64_t rows,
double sigma) {
445 double *data1, *expFactor;
446 int64_t i, j, j1, j2, nsRows, nsPerSide;
448 nsPerSide = 6 * sigma;
449 nsRows = 2 * nsPerSide + 1;
450 expFactor =
tmalloc(
sizeof(*expFactor) * nsRows);
451 for (j = -nsPerSide; j <= nsPerSide; j++)
452 expFactor[j + nsPerSide] = exp(-sqr(j / sigma) / 2.0) / (sigma * sqrt(2 * PI));
454 data1 = calloc(rows,
sizeof(*data1));
455 for (i = 0; i < rows; i++) {
462 for (j = j1; j <= j2; j++)
463 data1[i] += data[j] * expFactor[j - i + nsPerSide];
465 memcpy(data, data1,
sizeof(*data) * rows);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_DefineParameter1(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, void *fixed_value)
Defines a data parameter with a fixed numerical value.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long SavitzkyGolaySmooth(double *data, long rows, long order, long nLeft, long nRight, long derivativeOrder)
Applies Savitzky-Golay smoothing or differentiation to a data array.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
void smoothData(double *data, long rows, long smoothPoints, long smoothPasses)
Smooth a data array using a moving average.
long despikeData(double *data, long rows, long neighbors, long passes, long averageOf, double threshold, long countLimit)
Remove spikes from a data array by comparing each point to its neighbors.