68static char *option[N_OPTIONS] = {
80 "Usage: sddsbaseline [<input>] [<output>]\n"
81 " [-pipe=<in>[,<out>]]\n"
82 " [-columns=<listOfNames>]\n"
83 " [-nonnegative [-despike=passes=<number>,widthlimit=<value>] [-repeats=<count>]]\n"
84 " [-select={endpoints=<number> | outsideFWHA=<multiplier> | antioutlier=<passes>}]\n"
85 " [-method={average|fit[,terms=<number>]}]\n"
86 " [-majorOrder=row|column]\n\n"
88 " -pipe Specify input and/or output pipes.\n"
89 " -columns List of columns to process.\n"
90 " -nonnegative Forces all values to be nonnegative after baseline subtraction.\n"
91 " This is accomplished by setting all negative values to 0.\n"
92 " -despike Specify that positive features narrower than widthLimit shall be set to zero.\n"
94 " passes=<number> Number of despike passes.\n"
95 " widthlimit=<value> Width limit for despiking.\n"
96 " -repeats Specify how many times to apply the baseline removal algorithm.\n"
97 " Meaningful only if used in combination with -nonnegative.\n"
98 " -select Specify how to select points to include in baseline determination.\n"
100 " endpoints=<number>\n"
101 " outsideFWHA=<multiplier>\n"
102 " antioutlier=<passes>\n"
103 " -method Specify how to process selected points in order to compute baseline.\n"
106 " fit[,terms=<number>]\n"
107 " -majorOrder Specify write output in row or column major order.\n\n"
108 "Program by Michael Borland.("__DATE__
" "__TIME__
", SVN revision: " SVN_VERSION
")\n";
110#define SELECT_ENDPOINTS 0x0001U
111#define SELECT_OUTSIDEFWHA 0x0002U
112#define SELECT_ANTIOUTLIER 0x0004U
114#define METHOD_FIT 0x0001U
115#define METHOD_AVERAGE 0x0002U
117long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns);
118void selectEndpoints(
short *selected, int64_t rows,
long endPoints);
119void selectOutsideFWHA(
double *data,
double *indepData,
short *selected, int64_t rows,
double fwhaLimit);
120void selectAntiOutlier(
double *data,
short *selected, int64_t rows,
long passes);
121void fitAndRemoveBaseline(
double *data,
double *indepData,
short *selected, int64_t rows,
long terms);
122void averageAndRemoveBaseline(
double *data,
short *selected, int64_t rows);
123void despikeProfile(
double *data, int64_t rows,
long widthLimit,
long passes);
125int main(
int argc,
char **argv) {
128 char *input, *output;
129 long readCode, repeats, repeat, fitTerms = 2;
132 unsigned long pipeFlags, methodFlags, selectFlags, dummyFlags, majorOrderFlag;
133 SCANNED_ARG *scanned;
135 double *data, *indepData;
136 int32_t endPoints, antiOutlierPasses;
140 int32_t despikePasses, despikeWidthLimit;
141 short columnMajorOrder = -1;
144 argc =
scanargs(&scanned, argc, argv);
148 output = input = NULL;
150 columns = nonnegative = 0;
152 pipeFlags = methodFlags = selectFlags = dummyFlags = 0;
153 endPoints = antiOutlierPasses = 0;
156 despikeWidthLimit = 2;
158 for (iArg = 1; iArg < argc; iArg++) {
159 if (scanned[iArg].arg_type == OPTION) {
161 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
162 case CLO_MAJOR_ORDER:
164 scanned[iArg].n_items -= 1;
165 if (scanned[iArg].n_items > 0 &&
166 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
167 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
168 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
169 SDDS_Bomb(
"invalid -majorOrder syntax/values");
170 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
171 columnMajorOrder = 1;
172 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
173 columnMajorOrder = 0;
176 if (scanned[iArg].n_items < 2)
178 inputColumn =
tmalloc(
sizeof(*inputColumn) * (columns = scanned[iArg].n_items - 1));
179 for (i = 0; i < columns; i++)
180 inputColumn[i] = scanned[iArg].list[i + 1];
183 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
187 if (!(scanned[iArg].n_items -= 1))
189 if (!
scanItemList(&methodFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
190 "average", -1, NULL, 0, METHOD_AVERAGE,
191 "fit", -1, NULL, 0, METHOD_FIT,
192 "terms",
SDDS_LONG, &fitTerms, 1, 0, NULL) ||
193 bitsSet(methodFlags) != 1 || fitTerms < 2)
194 SDDS_Bomb(
"invalid -method syntax/values");
197 if (!(scanned[iArg].n_items -= 1))
199 if (!
scanItemList(&selectFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
200 "endpoints",
SDDS_LONG, &endPoints, 1, SELECT_ENDPOINTS,
201 "outsidefwha",
SDDS_DOUBLE, &outsideFWHA, 1, SELECT_OUTSIDEFWHA,
202 "antioutlier",
SDDS_LONG, &antiOutlierPasses, 1, SELECT_ANTIOUTLIER, NULL) ||
204 SDDS_Bomb(
"invalid -select syntax/values");
206 case CLO_NONNEGATIVE:
210 if (scanned[iArg].n_items != 2 ||
211 sscanf(scanned[iArg].list[1],
"%ld", &repeats) != 1 ||
217 if (!(scanned[iArg].n_items -= 1))
219 if (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
220 "passes",
SDDS_LONG, &despikePasses, 1, 0,
221 "widthlimit",
SDDS_LONG, &despikeWidthLimit, 1, 0, NULL) ||
223 despikeWidthLimit < 1)
224 SDDS_Bomb(
"invalid -despike syntax/values");
227 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
233 input = scanned[iArg].list[0];
235 output = scanned[iArg].list[0];
246 if (!nonnegative && despikePasses)
247 SDDS_Bomb(
"not meaningful to despike without setting -nonnegative");
248 if (!nonnegative && repeats > 1)
249 SDDS_Bomb(
"not meaningful to repeat without setting -nonnegative");
254 SDDS_Bomb(
"supply the names of columns to process with the -columns option");
259 if (!resolveColumnNames(&SDDSin, &inputColumn, &columns))
262 SDDS_Bomb(
"no columns selected for processing");
268 if (columnMajorOrder != -1)
269 SDDSout.layout.data_mode.column_major = columnMajorOrder;
271 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
281 if (!(indepData =
SDDS_Realloc(indepData,
sizeof(*indepData) * rows)) ||
282 !(selected =
SDDS_Realloc(selected,
sizeof(*selected) * rows)))
284 for (i = 0; i < rows; i++)
286 for (i = 0; i < columns; i++) {
289 for (repeat = 0; repeat < repeats; repeat++) {
290 for (j = 0; j < rows; j++)
292 switch (selectFlags) {
293 case SELECT_ENDPOINTS:
294 selectEndpoints(selected, rows, endPoints);
296 case SELECT_OUTSIDEFWHA:
297 selectOutsideFWHA(data, indepData, selected, rows, outsideFWHA);
299 case SELECT_ANTIOUTLIER:
300 selectAntiOutlier(data, selected, rows, antiOutlierPasses);
306 switch (methodFlags) {
308 fitAndRemoveBaseline(data, indepData, selected, rows, fitTerms);
311 averageAndRemoveBaseline(data, selected, rows);
317 for (j = 0; j < rows; j++)
321 despikeProfile(data, rows, despikeWidthLimit, despikePasses);
325 !
SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, selected, rows,
"SelectedForBaselineDetermination"))
348long resolveColumnNames(
SDDS_DATASET *SDDSin,
char ***column, int32_t *columns) {
352 for (i = 0; i < *columns; i++) {
364void selectEndpoints(
short *selected, int64_t rows,
long endPoints) {
366 for (i = 0; i < endPoints && i < rows; i++)
368 for (i = rows - 1; i > rows - endPoints - 1 && i >= 0; i--)
372void selectOutsideFWHA(
double *data,
double *indepData,
short *selected, int64_t rows,
double fwhaLimit) {
373 double top, base, fwha;
374 int64_t i, i1, i2, i2save;
376 double point1, point2;
378 if (rows < 3 || fwhaLimit <= 0)
388 if ((i1 =
findCrossingPoint(0, data, rows, (top - base) * 0.5 + base, 1, indepData, &point1)) < 0 ||
389 (i2 = i2save =
findCrossingPoint(i1, data, rows, (top - base) * 0.9 + base, -1, NULL, NULL)) < 0 ||
390 (i2 =
findCrossingPoint(i2, data, rows, (top - base) * 0.5 + base, -1, indepData, &point2)) < 0) {
393 fwha = point2 - point1;
395 for (i = 0; i < rows; i++)
397 for (i = imax - fwha * fwhaLimit; i <= imax + fwha * fwhaLimit; i++) {
406void selectAntiOutlier(
double *data,
short *selected, int64_t rows,
long passes) {
407 double ave, sum2, limit;
410 for (i = 0; i < rows; i++)
412 while (--passes >= 0) {
413 for (i = ave = count = 0; i < rows; i++)
421 for (i = sum2 = 0; i < rows; i++)
423 sum2 += sqr(data[i] - ave);
424 limit = 2 * sqrt(sum2 / count);
425 for (i = 0; i < rows; i++)
426 if (selected[i] && fabs(data[i] - ave) > limit)
431void fitAndRemoveBaseline(
double *data0,
double *indepData0,
short *selected, int64_t rows,
long fitTerms) {
432 double *data, *indepData = NULL;
435 double *coef, *sCoef;
437 if (!(coef = malloc(
sizeof(*coef) * fitTerms)) || !(sCoef = malloc(
sizeof(*sCoef) * fitTerms)))
438 SDDS_Bomb(
"allocation failure (fitAndRemoveBaseline)");
439 for (i = count = 0; i < rows; i++)
444 if (!(data = malloc(
sizeof(*data) * count)) || !(indepData = malloc(
sizeof(*indepData) * count)))
446 for (i = j = 0; i < rows; i++) {
449 indepData[j] = indepData0[i];
454 if (!
lsfn(indepData, data, NULL, (
long)count, fitTerms - 1, coef, sCoef, &chi, NULL))
457 for (i = 0; i < rows; i++) {
460 for (j = 0; j < fitTerms; j++) {
461 data0[i] -= term * coef[j];
462 term *= indepData0[i];
472void averageAndRemoveBaseline(
double *data,
short *selected, int64_t rows) {
475 for (i = count = ave = 0; i < rows; i++)
483 for (i = 0; i < rows; i++)
488void despikeProfile(
double *data, int64_t rows,
long widthLimit,
long passes) {
489 int64_t i, i0, i1, j;
490 while (--passes >= 0) {
491 for (i = 0; i < rows; i++) {
495 }
else if (!(data[i - 1] == 0 && data[i] != 0))
498 for (i1 = i + 1; i1 < rows; i1++)
501 if ((i1 - i0) <= widthLimit)
502 for (j = i0; j < i1; j++)
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
long bitsSet(unsigned long data)
Counts the number of set bits (1s) in the given data.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
long lsfn(double *xd, double *yd, double *sy, long nd, long nf, double *coef, double *s_coef, double *chi, double *diff)
Computes nth order polynomial least squares fit.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long findTopBaseLevels(double *top, double *base, double *data, int64_t points, long bins, double sigmasRequired)
Finds the top-level and base-level of a dataset.
int64_t findCrossingPoint(int64_t start, double *data, int64_t points, double level, long direction, double *indepData, double *location)
Finds the crossing point in the data where the data crosses a specified level.