SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsbaseline.c File Reference

Detailed Description

Baseline subtraction tool for SDDS datasets.

This program processes SDDS datasets to subtract a baseline from specified columns. It supports various methods for baseline computation and selection criteria, including options for nonnegative constraints, despiking, and multiple repeats. The tool provides a flexible and configurable solution for scientific data processing needs.

Usage

sddsbaseline [<input>] [<output>]
[-pipe=[<input>][,<output>]]
[-columns=<listOfNames>]
[-nonnegative]
[-despike=passes=<number>,widthlimit=<value>]
[-repeats=<count>]
[-select={endpoints=<number> |
outsideFWHA=<multiplier> |
antioutlier=<passes>}]
[-method={average|fit[,terms=<number>]}]
[-majorOrder=row|column]

Options

Required Description
-columns List of columns to process.
-select Defines the criteria for baseline determination (endpoints, outsideFWHA, antioutlier).
-method Selects the baseline computation method (average or fit with terms).
Optional Description
-pipe Specify input and/or output pipes.
-nonnegative Enforces all values to be nonnegative after baseline subtraction.
-despike Removes narrow positive features based on width and passes parameters.
-repeats Specifies the number of iterations for baseline subtraction.
-majorOrder Specifies output ordering: row or column major.

Incompatibilities

  • -nonnegative is required for:
    • -despike
    • Multiple iterations with -repeats
  • Only one selection criterion (endpoints, outsideFWHA, or antioutlier) may be used at a time.
  • fit method requires at least 2 terms for polynomial fitting.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, H. Shang

Definition in file sddsbaseline.c.

#include "mdb.h"
#include "scan.h"
#include "SDDS.h"

Go to the source code of this file.

Functions

long resolveColumnNames (SDDS_DATASET *SDDSin, char ***column, int32_t *columns)
 
void selectEndpoints (short *selected, int64_t rows, long endPoints)
 
void selectOutsideFWHA (double *data, double *indepData, short *selected, int64_t rows, double fwhaLimit)
 
void selectAntiOutlier (double *data, short *selected, int64_t rows, long passes)
 
void fitAndRemoveBaseline (double *data, double *indepData, short *selected, int64_t rows, long terms)
 
void averageAndRemoveBaseline (double *data, short *selected, int64_t rows)
 
void despikeProfile (double *data, int64_t rows, long widthLimit, long passes)
 
int main (int argc, char **argv)
 

Function Documentation

◆ averageAndRemoveBaseline()

void averageAndRemoveBaseline ( double * data,
short * selected,
int64_t rows )

Definition at line 480 of file sddsbaseline.c.

480 {
481 int64_t i, count;
482 double ave;
483 for (i = count = ave = 0; i < rows; i++)
484 if (selected[i]) {
485 count++;
486 ave += data[i];
487 }
488
489 if (count) {
490 ave /= count;
491 for (i = 0; i < rows; i++)
492 data[i] -= ave;
493 }
494}

◆ despikeProfile()

void despikeProfile ( double * data,
int64_t rows,
long widthLimit,
long passes )

Definition at line 496 of file sddsbaseline.c.

496 {
497 int64_t i, i0, i1, j;
498 while (--passes >= 0) {
499 for (i = 0; i < rows; i++) {
500 if (i == 0) {
501 if (data[i] != 0)
502 continue;
503 } else if (!(data[i - 1] == 0 && data[i] != 0))
504 continue;
505 i0 = i;
506 for (i1 = i + 1; i1 < rows; i1++)
507 if (!data[i1])
508 break;
509 if ((i1 - i0) <= widthLimit)
510 for (j = i0; j < i1; j++)
511 data[j] = 0;
512 }
513 }
514}

◆ fitAndRemoveBaseline()

void fitAndRemoveBaseline ( double * data,
double * indepData,
short * selected,
int64_t rows,
long terms )

Definition at line 439 of file sddsbaseline.c.

439 {
440 double *data, *indepData = NULL;
441 int64_t count, i, j;
442 double chi;
443 double *coef, *sCoef;
444
445 if (!(coef = malloc(sizeof(*coef) * fitTerms)) || !(sCoef = malloc(sizeof(*sCoef) * fitTerms)))
446 SDDS_Bomb("allocation failure (fitAndRemoveBaseline)");
447 for (i = count = 0; i < rows; i++)
448 if (selected[i])
449 count++;
450 if (count < 3)
451 return;
452 if (!(data = malloc(sizeof(*data) * count)) || !(indepData = malloc(sizeof(*indepData) * count)))
453 SDDS_Bomb("memory allocation failure");
454 for (i = j = 0; i < rows; i++) {
455 if (selected[i]) {
456 data[j] = data0[i];
457 indepData[j] = indepData0[i];
458 j++;
459 }
460 }
461
462 if (!lsfn(indepData, data, NULL, (long)count, fitTerms - 1, coef, sCoef, &chi, NULL))
463 return;
464
465 for (i = 0; i < rows; i++) {
466 double term;
467 term = 1;
468 for (j = 0; j < fitTerms; j++) {
469 data0[i] -= term * coef[j];
470 term *= indepData0[i];
471 }
472 }
473
474 free(data);
475 free(indepData);
476 free(coef);
477 free(sCoef);
478}
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
long lsfn(double *xd, double *yd, double *sy, long nd, long nf, double *coef, double *s_coef, double *chi, double *diff)
Computes nth order polynomial least squares fit.
Definition lsfn.c:34

◆ main()

int main ( int argc,
char ** argv )

Definition at line 133 of file sddsbaseline.c.

133 {
134 int iArg;
135 char **inputColumn;
136 char *input, *output;
137 long readCode, repeats, repeat, fitTerms = 2;
138 int64_t rows, i, j;
139 int32_t columns;
140 unsigned long pipeFlags, methodFlags, selectFlags, dummyFlags, majorOrderFlag;
141 SCANNED_ARG *scanned;
142 SDDS_DATASET SDDSin, SDDSout;
143 double *data, *indepData;
144 int32_t endPoints, antiOutlierPasses;
145 short *selected;
146 double outsideFWHA;
147 long nonnegative;
148 int32_t despikePasses, despikeWidthLimit;
149 short columnMajorOrder = -1;
150
152 argc = scanargs(&scanned, argc, argv);
153 if (argc < 2)
154 bomb(NULL, USAGE);
155
156 output = input = NULL;
157 inputColumn = NULL;
158 columns = nonnegative = 0;
159 repeats = 1;
160 pipeFlags = methodFlags = selectFlags = dummyFlags = 0;
161 endPoints = antiOutlierPasses = 0;
162 outsideFWHA = 0;
163 despikePasses = 0;
164 despikeWidthLimit = 2;
165
166 for (iArg = 1; iArg < argc; iArg++) {
167 if (scanned[iArg].arg_type == OPTION) {
168 /* process options here */
169 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
170 case CLO_MAJOR_ORDER:
171 majorOrderFlag = 0;
172 scanned[iArg].n_items -= 1;
173 if (scanned[iArg].n_items > 0 &&
174 (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
175 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
176 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
177 SDDS_Bomb("invalid -majorOrder syntax/values");
178 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
179 columnMajorOrder = 1;
180 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
181 columnMajorOrder = 0;
182 break;
183 case CLO_COLUMNS:
184 if (scanned[iArg].n_items < 2)
185 SDDS_Bomb("invalid -columns syntax");
186 inputColumn = tmalloc(sizeof(*inputColumn) * (columns = scanned[iArg].n_items - 1));
187 for (i = 0; i < columns; i++)
188 inputColumn[i] = scanned[iArg].list[i + 1];
189 break;
190 case CLO_PIPE:
191 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
192 SDDS_Bomb("invalid -pipe syntax");
193 break;
194 case CLO_METHOD:
195 if (!(scanned[iArg].n_items -= 1))
196 SDDS_Bomb("invalid -method syntax");
197 if (!scanItemList(&methodFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
198 "average", -1, NULL, 0, METHOD_AVERAGE,
199 "fit", -1, NULL, 0, METHOD_FIT,
200 "terms", SDDS_LONG, &fitTerms, 1, 0, NULL) ||
201 bitsSet(methodFlags) != 1 || fitTerms < 2)
202 SDDS_Bomb("invalid -method syntax/values");
203 break;
204 case CLO_SELECT:
205 if (!(scanned[iArg].n_items -= 1))
206 SDDS_Bomb("invalid -select syntax");
207 if (!scanItemList(&selectFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
208 "endpoints", SDDS_LONG, &endPoints, 1, SELECT_ENDPOINTS,
209 "outsidefwha", SDDS_DOUBLE, &outsideFWHA, 1, SELECT_OUTSIDEFWHA,
210 "antioutlier", SDDS_LONG, &antiOutlierPasses, 1, SELECT_ANTIOUTLIER, NULL) ||
211 bitsSet(selectFlags) != 1)
212 SDDS_Bomb("invalid -select syntax/values");
213 break;
214 case CLO_NONNEGATIVE:
215 nonnegative = 1;
216 break;
217 case CLO_REPEATS:
218 if (scanned[iArg].n_items != 2 ||
219 sscanf(scanned[iArg].list[1], "%ld", &repeats) != 1 ||
220 repeats <= 0)
221 SDDS_Bomb("invalid -repeats syntax");
222 break;
223 case CLO_DESPIKE:
224 despikePasses = 1;
225 if (!(scanned[iArg].n_items -= 1))
226 SDDS_Bomb("invalid -despike syntax");
227 if (!scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
228 "passes", SDDS_LONG, &despikePasses, 1, 0,
229 "widthlimit", SDDS_LONG, &despikeWidthLimit, 1, 0, NULL) ||
230 despikePasses < 0 ||
231 despikeWidthLimit < 1)
232 SDDS_Bomb("invalid -despike syntax/values");
233 break;
234 default:
235 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
236 exit(EXIT_FAILURE);
237 break;
238 }
239 } else {
240 if (!input)
241 input = scanned[iArg].list[0];
242 else if (!output)
243 output = scanned[iArg].list[0];
244 else
245 SDDS_Bomb("too many filenames seen");
246 }
247 }
248
249 if (!bitsSet(selectFlags))
250 SDDS_Bomb("no -select option given");
251 if (!bitsSet(methodFlags))
252 SDDS_Bomb("no -method option given");
253
254 if (!nonnegative && despikePasses)
255 SDDS_Bomb("not meaningful to despike without setting -nonnegative");
256 if (!nonnegative && repeats > 1)
257 SDDS_Bomb("not meaningful to repeat without setting -nonnegative");
258
259 processFilenames("sddsbaseline", &input, &output, pipeFlags, 0, NULL);
260
261 if (!columns)
262 SDDS_Bomb("supply the names of columns to process with the -columns option");
263
264 if (!SDDS_InitializeInput(&SDDSin, input))
265 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
266
267 if (!resolveColumnNames(&SDDSin, &inputColumn, &columns))
268 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
269 if (!columns)
270 SDDS_Bomb("no columns selected for processing");
271
272 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
273 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
274 if (!SDDS_DefineSimpleColumn(&SDDSout, "SelectedForBaselineDetermination", NULL, SDDS_SHORT))
275 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
276 if (columnMajorOrder != -1)
277 SDDSout.layout.data_mode.column_major = columnMajorOrder;
278 else
279 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
280 if (!SDDS_WriteLayout(&SDDSout))
281 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
282
283 indepData = NULL;
284 selected = NULL;
285 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
286 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
287 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
288 if ((rows = SDDS_CountRowsOfInterest(&SDDSin))) {
289 if (!(indepData = SDDS_Realloc(indepData, sizeof(*indepData) * rows)) ||
290 !(selected = SDDS_Realloc(selected, sizeof(*selected) * rows)))
291 SDDS_Bomb("memory allocation failure");
292 for (i = 0; i < rows; i++)
293 indepData[i] = i;
294 for (i = 0; i < columns; i++) {
295 if (!(data = SDDS_GetColumnInDoubles(&SDDSin, inputColumn[i])))
296 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
297 for (repeat = 0; repeat < repeats; repeat++) {
298 for (j = 0; j < rows; j++)
299 selected[j] = 0;
300 switch (selectFlags) {
301 case SELECT_ENDPOINTS:
302 selectEndpoints(selected, rows, endPoints);
303 break;
304 case SELECT_OUTSIDEFWHA:
305 selectOutsideFWHA(data, indepData, selected, rows, outsideFWHA);
306 break;
307 case SELECT_ANTIOUTLIER:
308 selectAntiOutlier(data, selected, rows, antiOutlierPasses);
309 break;
310 default:
311 SDDS_Bomb("invalid select flag");
312 break;
313 }
314 switch (methodFlags) {
315 case METHOD_FIT:
316 fitAndRemoveBaseline(data, indepData, selected, rows, fitTerms);
317 break;
318 case METHOD_AVERAGE:
319 averageAndRemoveBaseline(data, selected, rows);
320 break;
321 default:
322 break;
323 }
324 if (nonnegative) {
325 for (j = 0; j < rows; j++)
326 if (data[j] < 0)
327 data[j] = 0;
328 if (despikePasses)
329 despikeProfile(data, rows, despikeWidthLimit, despikePasses);
330 }
331 }
332 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, data, rows, inputColumn[i]) ||
333 !SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, selected, rows, "SelectedForBaselineDetermination"))
334 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
335 free(data);
336 }
337 }
338 if (!SDDS_WritePage(&SDDSout))
339 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
340 }
341 if (!SDDS_Terminate(&SDDSin)) {
342 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
343 return EXIT_FAILURE;
344 }
345 if (!SDDS_Terminate(&SDDSout)) {
346 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
347 return EXIT_FAILURE;
348 }
349 if (indepData)
350 free(indepData);
351 if (selected)
352 free(selected);
353 return EXIT_SUCCESS;
354}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
long bitsSet(unsigned long data)
Counts the number of set bits (1s) in the given data.
Definition binary.c:52
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ resolveColumnNames()

long resolveColumnNames ( SDDS_DATASET * SDDSin,
char *** column,
int32_t * columns )

Definition at line 356 of file sddsbaseline.c.

356 {
357 long i;
358
359 SDDS_SetColumnFlags(SDDSin, 0);
360 for (i = 0; i < *columns; i++) {
361 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, (*column)[i], SDDS_OR))
362 return 0;
363 }
364 free(*column);
365 if (!(*column = SDDS_GetColumnNames(SDDSin, columns)) || *columns == 0) {
366 SDDS_SetError("no columns found");
367 return 0;
368 }
369 return 1;
370}
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.

◆ selectAntiOutlier()

void selectAntiOutlier ( double * data,
short * selected,
int64_t rows,
long passes )

Definition at line 414 of file sddsbaseline.c.

414 {
415 double ave, sum2, limit;
416 int64_t i, count;
417
418 for (i = 0; i < rows; i++)
419 selected[i] = 1;
420 while (--passes >= 0) {
421 for (i = ave = count = 0; i < rows; i++)
422 if (selected[i]) {
423 ave += data[i];
424 count++;
425 }
426 if (!count)
427 break;
428 ave /= count;
429 for (i = sum2 = 0; i < rows; i++)
430 if (selected[i])
431 sum2 += sqr(data[i] - ave);
432 limit = 2 * sqrt(sum2 / count);
433 for (i = 0; i < rows; i++)
434 if (selected[i] && fabs(data[i] - ave) > limit)
435 selected[i] = 0;
436 }
437}

◆ selectEndpoints()

void selectEndpoints ( short * selected,
int64_t rows,
long endPoints )

Definition at line 372 of file sddsbaseline.c.

372 {
373 int64_t i;
374 for (i = 0; i < endPoints && i < rows; i++)
375 selected[i] = 1;
376 for (i = rows - 1; i > rows - endPoints - 1 && i >= 0; i--)
377 selected[i] = 1;
378}

◆ selectOutsideFWHA()

void selectOutsideFWHA ( double * data,
double * indepData,
short * selected,
int64_t rows,
double fwhaLimit )

Definition at line 380 of file sddsbaseline.c.

380 {
381 double top, base, fwha;
382 int64_t i, i1, i2, i2save;
383 int64_t imin, imax;
384 double point1, point2;
385
386 if (rows < 3 || fwhaLimit <= 0)
387 return;
388
389 index_min_max(&imin, &imax, data, rows);
390 if (!data[imax])
391 return;
392
393 if (!findTopBaseLevels(&top, &base, data, rows, 50, 2.0))
394 return;
395
396 if ((i1 = findCrossingPoint(0, data, rows, (top - base) * 0.5 + base, 1, indepData, &point1)) < 0 ||
397 (i2 = i2save = findCrossingPoint(i1, data, rows, (top - base) * 0.9 + base, -1, NULL, NULL)) < 0 ||
398 (i2 = findCrossingPoint(i2, data, rows, (top - base) * 0.5 + base, -1, indepData, &point2)) < 0) {
399 return;
400 }
401 fwha = point2 - point1;
402
403 for (i = 0; i < rows; i++)
404 selected[i] = 1;
405 for (i = imax - fwha * fwhaLimit; i <= imax + fwha * fwhaLimit; i++) {
406 if (i < 0)
407 continue;
408 if (i >= rows)
409 break;
410 selected[i] = 0;
411 }
412}
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
Definition findMinMax.c:116
long findTopBaseLevels(double *top, double *base, double *data, int64_t points, long bins, double sigmasRequired)
Finds the top-level and base-level of a dataset.
Definition topbase.c:36
int64_t findCrossingPoint(int64_t start, double *data, int64_t points, double level, long direction, double *indepData, double *location)
Finds the crossing point in the data where the data crosses a specified level.
Definition topbase.c:118