76static char *option[N_OPTIONS] = {
88 "sddsbaseline [<input>] [<output>]\n"
89 " [-pipe=[<input>][,<output>]]\n"
90 " [-columns=<listOfNames>]\n"
91 " [-nonnegative [-despike=passes=<number>,widthlimit=<value>] [-repeats=<count>]]\n"
92 " [-select={endpoints=<number> | outsideFWHA=<multiplier> | antioutlier=<passes>}]\n"
93 " [-method={average|fit[,terms=<number>]}]\n"
94 " [-majorOrder=row|column]\n"
96 " -pipe Specify input and/or output pipes.\n"
97 " -columns List of columns to process.\n"
98 " -nonnegative Forces all values to be nonnegative after baseline subtraction.\n"
99 " This is accomplished by setting all negative values to 0.\n"
100 " -despike Specify that positive features narrower than widthLimit shall be set to zero.\n"
102 " passes=<number> Number of despike passes.\n"
103 " widthlimit=<value> Width limit for despiking.\n"
104 " -repeats Specify how many times to apply the baseline removal algorithm.\n"
105 " Meaningful only if used in combination with -nonnegative.\n"
106 " -select Specify how to select points to include in baseline determination.\n"
108 " endpoints=<number>\n"
109 " outsideFWHA=<multiplier>\n"
110 " antioutlier=<passes>\n"
111 " -method Specify how to process selected points in order to compute baseline.\n"
114 " fit[,terms=<number>]\n"
115 " -majorOrder Specify write output in row or column major order.\n\n"
116 "Program by Michael Borland.("__DATE__
" "__TIME__
", SVN revision: " SVN_VERSION
")\n";
118#define SELECT_ENDPOINTS 0x0001U
119#define SELECT_OUTSIDEFWHA 0x0002U
120#define SELECT_ANTIOUTLIER 0x0004U
122#define METHOD_FIT 0x0001U
123#define METHOD_AVERAGE 0x0002U
125long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns);
126void selectEndpoints(
short *selected, int64_t rows,
long endPoints);
127void selectOutsideFWHA(
double *data,
double *indepData,
short *selected, int64_t rows,
double fwhaLimit);
128void selectAntiOutlier(
double *data,
short *selected, int64_t rows,
long passes);
129void fitAndRemoveBaseline(
double *data,
double *indepData,
short *selected, int64_t rows,
long terms);
130void averageAndRemoveBaseline(
double *data,
short *selected, int64_t rows);
131void despikeProfile(
double *data, int64_t rows,
long widthLimit,
long passes);
133int main(
int argc,
char **argv) {
136 char *input, *output;
137 long readCode, repeats, repeat, fitTerms = 2;
140 unsigned long pipeFlags, methodFlags, selectFlags, dummyFlags, majorOrderFlag;
141 SCANNED_ARG *scanned;
143 double *data, *indepData;
144 int32_t endPoints, antiOutlierPasses;
148 int32_t despikePasses, despikeWidthLimit;
149 short columnMajorOrder = -1;
152 argc =
scanargs(&scanned, argc, argv);
156 output = input = NULL;
158 columns = nonnegative = 0;
160 pipeFlags = methodFlags = selectFlags = dummyFlags = 0;
161 endPoints = antiOutlierPasses = 0;
164 despikeWidthLimit = 2;
166 for (iArg = 1; iArg < argc; iArg++) {
167 if (scanned[iArg].arg_type == OPTION) {
169 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
170 case CLO_MAJOR_ORDER:
172 scanned[iArg].n_items -= 1;
173 if (scanned[iArg].n_items > 0 &&
174 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
175 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
176 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
177 SDDS_Bomb(
"invalid -majorOrder syntax/values");
178 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
179 columnMajorOrder = 1;
180 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
181 columnMajorOrder = 0;
184 if (scanned[iArg].n_items < 2)
186 inputColumn =
tmalloc(
sizeof(*inputColumn) * (columns = scanned[iArg].n_items - 1));
187 for (i = 0; i < columns; i++)
188 inputColumn[i] = scanned[iArg].list[i + 1];
191 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
195 if (!(scanned[iArg].n_items -= 1))
197 if (!
scanItemList(&methodFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
198 "average", -1, NULL, 0, METHOD_AVERAGE,
199 "fit", -1, NULL, 0, METHOD_FIT,
200 "terms",
SDDS_LONG, &fitTerms, 1, 0, NULL) ||
201 bitsSet(methodFlags) != 1 || fitTerms < 2)
202 SDDS_Bomb(
"invalid -method syntax/values");
205 if (!(scanned[iArg].n_items -= 1))
207 if (!
scanItemList(&selectFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
208 "endpoints",
SDDS_LONG, &endPoints, 1, SELECT_ENDPOINTS,
209 "outsidefwha",
SDDS_DOUBLE, &outsideFWHA, 1, SELECT_OUTSIDEFWHA,
210 "antioutlier",
SDDS_LONG, &antiOutlierPasses, 1, SELECT_ANTIOUTLIER, NULL) ||
212 SDDS_Bomb(
"invalid -select syntax/values");
214 case CLO_NONNEGATIVE:
218 if (scanned[iArg].n_items != 2 ||
219 sscanf(scanned[iArg].list[1],
"%ld", &repeats) != 1 ||
225 if (!(scanned[iArg].n_items -= 1))
227 if (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
228 "passes",
SDDS_LONG, &despikePasses, 1, 0,
229 "widthlimit",
SDDS_LONG, &despikeWidthLimit, 1, 0, NULL) ||
231 despikeWidthLimit < 1)
232 SDDS_Bomb(
"invalid -despike syntax/values");
235 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
241 input = scanned[iArg].list[0];
243 output = scanned[iArg].list[0];
254 if (!nonnegative && despikePasses)
255 SDDS_Bomb(
"not meaningful to despike without setting -nonnegative");
256 if (!nonnegative && repeats > 1)
257 SDDS_Bomb(
"not meaningful to repeat without setting -nonnegative");
262 SDDS_Bomb(
"supply the names of columns to process with the -columns option");
267 if (!resolveColumnNames(&SDDSin, &inputColumn, &columns))
270 SDDS_Bomb(
"no columns selected for processing");
276 if (columnMajorOrder != -1)
277 SDDSout.layout.data_mode.column_major = columnMajorOrder;
279 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
289 if (!(indepData =
SDDS_Realloc(indepData,
sizeof(*indepData) * rows)) ||
290 !(selected =
SDDS_Realloc(selected,
sizeof(*selected) * rows)))
292 for (i = 0; i < rows; i++)
294 for (i = 0; i < columns; i++) {
297 for (repeat = 0; repeat < repeats; repeat++) {
298 for (j = 0; j < rows; j++)
300 switch (selectFlags) {
301 case SELECT_ENDPOINTS:
302 selectEndpoints(selected, rows, endPoints);
304 case SELECT_OUTSIDEFWHA:
305 selectOutsideFWHA(data, indepData, selected, rows, outsideFWHA);
307 case SELECT_ANTIOUTLIER:
308 selectAntiOutlier(data, selected, rows, antiOutlierPasses);
314 switch (methodFlags) {
316 fitAndRemoveBaseline(data, indepData, selected, rows, fitTerms);
319 averageAndRemoveBaseline(data, selected, rows);
325 for (j = 0; j < rows; j++)
329 despikeProfile(data, rows, despikeWidthLimit, despikePasses);
333 !
SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, selected, rows,
"SelectedForBaselineDetermination"))
356long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns) {
360 for (i = 0; i < *columns; i++) {
372void selectEndpoints(
short *selected, int64_t rows,
long endPoints) {
374 for (i = 0; i < endPoints && i < rows; i++)
376 for (i = rows - 1; i > rows - endPoints - 1 && i >= 0; i--)
380void selectOutsideFWHA(
double *data,
double *indepData,
short *selected, int64_t rows,
double fwhaLimit) {
381 double top, base, fwha;
382 int64_t i, i1, i2, i2save;
384 double point1, point2;
386 if (rows < 3 || fwhaLimit <= 0)
396 if ((i1 =
findCrossingPoint(0, data, rows, (top - base) * 0.5 + base, 1, indepData, &point1)) < 0 ||
397 (i2 = i2save =
findCrossingPoint(i1, data, rows, (top - base) * 0.9 + base, -1, NULL, NULL)) < 0 ||
398 (i2 =
findCrossingPoint(i2, data, rows, (top - base) * 0.5 + base, -1, indepData, &point2)) < 0) {
401 fwha = point2 - point1;
403 for (i = 0; i < rows; i++)
405 for (i = imax - fwha * fwhaLimit; i <= imax + fwha * fwhaLimit; i++) {
414void selectAntiOutlier(
double *data,
short *selected, int64_t rows,
long passes) {
415 double ave, sum2, limit;
418 for (i = 0; i < rows; i++)
420 while (--passes >= 0) {
421 for (i = ave = count = 0; i < rows; i++)
429 for (i = sum2 = 0; i < rows; i++)
431 sum2 += sqr(data[i] - ave);
432 limit = 2 * sqrt(sum2 / count);
433 for (i = 0; i < rows; i++)
434 if (selected[i] && fabs(data[i] - ave) > limit)
439void fitAndRemoveBaseline(
double *data0,
double *indepData0,
short *selected, int64_t rows,
long fitTerms) {
440 double *data, *indepData = NULL;
443 double *coef, *sCoef;
445 if (!(coef = malloc(
sizeof(*coef) * fitTerms)) || !(sCoef = malloc(
sizeof(*sCoef) * fitTerms)))
446 SDDS_Bomb(
"allocation failure (fitAndRemoveBaseline)");
447 for (i = count = 0; i < rows; i++)
452 if (!(data = malloc(
sizeof(*data) * count)) || !(indepData = malloc(
sizeof(*indepData) * count)))
454 for (i = j = 0; i < rows; i++) {
457 indepData[j] = indepData0[i];
462 if (!
lsfn(indepData, data, NULL, (
long)count, fitTerms - 1, coef, sCoef, &chi, NULL))
465 for (i = 0; i < rows; i++) {
468 for (j = 0; j < fitTerms; j++) {
469 data0[i] -= term * coef[j];
470 term *= indepData0[i];
480void averageAndRemoveBaseline(
double *data,
short *selected, int64_t rows) {
483 for (i = count = ave = 0; i < rows; i++)
491 for (i = 0; i < rows; i++)
496void despikeProfile(
double *data, int64_t rows,
long widthLimit,
long passes) {
497 int64_t i, i0, i1, j;
498 while (--passes >= 0) {
499 for (i = 0; i < rows; i++) {
503 }
else if (!(data[i - 1] == 0 && data[i] != 0))
506 for (i1 = i + 1; i1 < rows; i1++)
509 if ((i1 - i0) <= widthLimit)
510 for (j = i0; j < i1; j++)
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
long bitsSet(unsigned long data)
Counts the number of set bits (1s) in the given data.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
long lsfn(double *xd, double *yd, double *sy, long nd, long nf, double *coef, double *s_coef, double *chi, double *diff)
Computes nth order polynomial least squares fit.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long findTopBaseLevels(double *top, double *base, double *data, int64_t points, long bins, double sigmasRequired)
Finds the top-level and base-level of a dataset.
int64_t findCrossingPoint(int64_t start, double *data, int64_t points, double level, long direction, double *indepData, double *location)
Finds the crossing point in the data where the data crosses a specified level.