SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddspeakfind.c File Reference

Detailed Description

Program to detect and analyze peaks in SDDS data files.

This program processes an SDDS (Self Describing Data Set) file to identify peaks within a specified column of data. Detection is based on configurable parameters like thresholds, curvature limits, exclusion zones, and more. The program supports input and output through files or pipes and offers compatibility with both row-major and column-major data storage orders.

Usage

sddspeakfind [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
-column=<columnName>
[-threshold=<value>|@<parametername>]
[-fivePoint]
[-sevenPoint]
[-exclusionZone=<fraction>]
[-changeThreshold=<fractionalChange>]
[-curvatureLimit=<xColumn>,<minValue>]
[-majorOrder=row|column]

Options

Required Description
-column Specify the column name for peak detection.
Optional Description
-pipe Specify input/output through pipes.
-threshold Set a threshold for peak detection.
-fivePoint Use a 5-point criterion for peak detection.
-sevenPoint Use a 7-point criterion for peak detection.
-exclusionZone Exclude peaks too close to each other, specified as a fraction.
-changeThreshold Apply a fractional change threshold for filtering flat peaks.
-curvatureLimit Apply curvature filtering to exclude shallow peaks.
-majorOrder Specify row or column major order for output.

Incompatibilities

  • The following options are mutually exclusive:
    • -fivePoint and -sevenPoint

Requirements

  • For -curvatureLimit:
    • Requires both <xColumn> and <minValue> to be specified.
  • For -threshold:
    • Accepts either a numeric value or a parameter name prefixed with @.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
M. Borland, C. Saunders, R. Soliday, H. Shang

Definition in file sddspeakfind.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"

Go to the source code of this file.

Functions

void mark_peaks (double *data, int32_t *row_flag, int64_t rows, long n_points)
 
void unmark_flat_peaks (double *data, int32_t *row_flag, int64_t rows, double change_threshold, long five_point, long seven_point, double *x_data, double curvature_limit)
 
void unmark_excluded_peaks (double *data, int32_t *row_flag, int64_t rows, double ezone_fraction)
 
int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 121 of file sddspeakfind.c.

121 {
122 SDDS_DATASET in_set, out_set;
123 SCANNED_ARG *s_arg;
124 long i_arg, page_returned;
125 int64_t rows, row;
126 int32_t *row_flag;
127 char *input, *output, *column_name, *par_threshold_name, *x_column_name;
128 double *data, *x_data;
129 unsigned long pipe_flags, flags, major_order_flag;
130 double threshold, ezone_fraction, change_threshold, curvature_limit;
131 short column_major_order = -1;
132
134 argc = scanargs(&s_arg, argc, argv);
135 if (argc < 2 || argc > (2 + N_OPTIONS))
136 bomb(NULL, USAGE);
137
138 flags = pipe_flags = 0;
139 input = output = NULL;
140 column_name = NULL;
141 ezone_fraction = change_threshold = curvature_limit = 0;
142 row_flag = NULL;
143 par_threshold_name = NULL;
144 x_column_name = NULL;
145 x_data = NULL;
146
147 for (i_arg = 1; i_arg < argc; i_arg++) {
148 if (s_arg[i_arg].arg_type == OPTION) {
149 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
150 case CLO_MAJOR_ORDER:
151 major_order_flag = 0;
152 s_arg[i_arg].n_items--;
153 if (s_arg[i_arg].n_items > 0 &&
154 (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1,
155 &s_arg[i_arg].n_items, 0, "row", -1, NULL, 0,
156 SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0,
157 SDDS_COLUMN_MAJOR_ORDER, NULL)))
158 SDDS_Bomb("invalid -majorOrder syntax/values");
159 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER)
160 column_major_order = 1;
161 else if (major_order_flag & SDDS_ROW_MAJOR_ORDER)
162 column_major_order = 0;
163 break;
164 case CLO_THRESHOLD:
165 if (s_arg[i_arg].n_items == 2) {
166 if (s_arg[i_arg].list[1][0] == '@') {
167 SDDS_CopyString(&par_threshold_name, s_arg[i_arg].list[1] + 1);
168 flags |= PAR_THRESHOLD;
169 } else {
170 if (sscanf(s_arg[i_arg].list[1], "%lf", &threshold) != 1)
171 SDDS_Bomb("incorrect -threshold syntax");
172 flags |= THRESHOLD;
173 }
174 } else
175 SDDS_Bomb("incorrect -threshold syntax");
176 break;
177 case CLO_FIVEPOINT:
178 flags |= FIVEPOINT;
179 break;
180 case CLO_SEVENPOINT:
181 flags |= SEVENPOINT;
182 break;
183 case CLO_CHANGETHRESHOLD:
184 if (s_arg[i_arg].n_items != 2 ||
185 sscanf(s_arg[i_arg].list[1], "%lf", &change_threshold) != 1 ||
186 change_threshold <= 0)
187 SDDS_Bomb("incorrect -changeThreshold syntax or values");
188 flags |= CHANGETHRES;
189 break;
190 case CLO_CURVATURE_LIMIT:
191 if (s_arg[i_arg].n_items != 3 ||
192 !strlen(x_column_name = s_arg[i_arg].list[1]) ||
193 sscanf(s_arg[i_arg].list[2], "%lf", &curvature_limit) != 1 ||
194 curvature_limit <= 0)
195 SDDS_Bomb("incorrect -curvatureLimit syntax or values");
196 flags |= CURVATURELIMIT;
197 break;
198 case CLO_PIPE:
199 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags))
200 SDDS_Bomb("invalid -pipe syntax");
201 break;
202 case CLO_COLUMN:
203 if (s_arg[i_arg].n_items != 2)
204 SDDS_Bomb("invalid -column syntax");
205 column_name = s_arg[i_arg].list[1];
206 break;
207 case CLO_EXCLUSIONZONE:
208 if (s_arg[i_arg].n_items != 2 ||
209 sscanf(s_arg[i_arg].list[1], "%lf", &ezone_fraction) != 1 ||
210 ezone_fraction <= 0)
211 SDDS_Bomb("invalid -exclusionZone syntax or value");
212 flags |= EZONEFRAC;
213 break;
214 default:
215 fprintf(stderr, "error: unknown/ambiguous option: %s\n",
216 s_arg[i_arg].list[0]);
217 exit(1);
218 break;
219 }
220 } else {
221 if (input == NULL)
222 input = s_arg[i_arg].list[0];
223 else if (output == NULL)
224 output = s_arg[i_arg].list[0];
225 else
226 SDDS_Bomb("too many filenames");
227 }
228 }
229
230 processFilenames("sddspeakfind", &input, &output, pipe_flags, 0, NULL);
231
232 if (!column_name)
233 SDDS_Bomb("-column option must be given");
234
235 if (!SDDS_InitializeInput(&in_set, input))
236 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
237
238 if (!SDDS_FindColumn(&in_set, FIND_NUMERIC_TYPE, column_name, NULL))
239 SDDS_Bomb("the given column is nonexistent or nonnumeric");
240
241 if (x_column_name &&
242 !SDDS_FindColumn(&in_set, FIND_NUMERIC_TYPE, x_column_name, NULL))
243 SDDS_Bomb("the given x column is nonexistent or nonnumeric");
244
245 if (!SDDS_InitializeCopy(&out_set, &in_set, output, "w"))
246 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
247
248 out_set.layout.data_mode.column_major =
249 column_major_order != -1 ? column_major_order : in_set.layout.data_mode.column_major;
250
251 if (!SDDS_WriteLayout(&out_set))
252 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
253
254 /* Main loop for processing pages */
255 while ((page_returned = SDDS_ReadPage(&in_set)) > 0) {
256 if (!SDDS_CopyPage(&out_set, &in_set)) {
257 SDDS_SetError("Problem copying data for output file");
258 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
259 }
260
261 if ((rows = SDDS_CountRowsOfInterest(&out_set)) > 1) {
262 data = SDDS_GetColumnInDoubles(&in_set, column_name);
263 if (!data)
264 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
265
266 if (x_column_name)
267 x_data = SDDS_GetColumnInDoubles(&in_set, x_column_name);
268 if (x_column_name && !x_data)
269 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
270
271 row_flag = SDDS_Realloc(row_flag, sizeof(*row_flag) * rows);
272 for (row = 0; row < rows; row++)
273 row_flag[row] = 0;
274
275 mark_peaks(data, row_flag, rows,
276 flags & SEVENPOINT ? 7 : (flags & FIVEPOINT ? 5 : 3));
277
278 if (flags & PAR_THRESHOLD)
279 if (!SDDS_GetParameter(&in_set, par_threshold_name, &threshold))
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281
282 if (flags & THRESHOLD || flags & PAR_THRESHOLD)
283 for (row = 0; row < rows; row++)
284 if (row_flag[row] && data[row] < threshold)
285 row_flag[row] = 0;
286
287 if (flags & CHANGETHRES || flags & CURVATURELIMIT)
288 unmark_flat_peaks(data, row_flag, rows, change_threshold,
289 flags & FIVEPOINT, flags & SEVENPOINT, x_data, curvature_limit);
290
291 if (flags & EZONEFRAC)
292 unmark_excluded_peaks(data, row_flag, rows, ezone_fraction);
293
294 if (!SDDS_AssertRowFlags(&out_set, SDDS_FLAG_ARRAY, row_flag, rows))
295 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
296
297 free(data);
298 }
299
300 if (!SDDS_WritePage(&out_set))
301 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
302 }
303
304 if (!SDDS_Terminate(&in_set) || !SDDS_Terminate(&out_set)) {
305 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
306 exit(1);
307 }
308
309 free_scanargs(&s_arg, argc);
310 free(par_threshold_name);
311 free(row_flag);
312
313 return 0;
314}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_AssertRowFlags(SDDS_DATASET *SDDS_dataset, uint32_t mode,...)
Sets acceptance flags for rows based on specified criteria.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
char * SDDS_FindColumn(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Finds the first column in the SDDS dataset that matches the specified criteria.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ mark_peaks()

void mark_peaks ( double * data,
int32_t * row_flag,
int64_t rows,
long n_points )

Definition at line 316 of file sddspeakfind.c.

316 {
317 double y0, y1, y2;
318 int64_t i, rows_minus;
319 short five_point = 0;
320
321 switch (n_points) {
322 case 5:
323 y0 = data[1];
324 y1 = data[2];
325 if (rows < 5)
326 return;
327 five_point = 1;
328 break;
329 case 7:
330 if (rows < 7)
331 return;
332 for (i = 3; i < (rows - 3); i++) {
333 if ((data[i - 3] < data[i - 2] && data[i - 2] < data[i - 1] &&
334 data[i - 1] < data[i] && data[i + 1] < data[i] &&
335 data[i + 2] < data[i + 1] && data[i + 3] < data[i + 2]))
336 row_flag[i] = 1;
337 }
338 return;
339 default:
340 if (rows < 3)
341 return;
342 y0 = data[0];
343 y1 = data[1];
344 }
345
346 rows_minus = rows - (five_point ? 2 : 1);
347 for (i = (five_point ? 2 : 1); i < rows_minus; i++) {
348 y2 = data[i + 1];
349 if ((y1 > y0 && y1 > y2) || (y1 == y0 && y1 > y2) || (y1 > y0 && y1 == y2)) {
350 if (!five_point || (data[i - 2] < y0 && data[i + 2] < y2))
351 row_flag[i] = 1;
352 }
353 y0 = y1;
354 y1 = y2;
355 }
356}

◆ unmark_excluded_peaks()

void unmark_excluded_peaks ( double * data,
int32_t * row_flag,
int64_t rows,
double ezone_fraction )

Definition at line 393 of file sddspeakfind.c.

394 {
395 int64_t lower, upper, i, j, rows1;
396 rows1 = rows - 1;
397
398 for (i = 0; i < rows; i++) {
399 if (row_flag[i]) {
400 lower = i - ((int64_t)(ezone_fraction / 2.0 * rows + 0.5));
401 if (lower < 0)
402 lower = 0;
403 upper = i + ((int64_t)(ezone_fraction / 2.0 * rows + 0.5));
404 if (upper > rows1)
405 upper = rows1;
406 for (j = lower; j <= upper; j++) {
407 if (j != i && row_flag[j] && data[j] <= data[i])
408 row_flag[j] = 0;
409 }
410 }
411 }
412}

◆ unmark_flat_peaks()

void unmark_flat_peaks ( double * data,
int32_t * row_flag,
int64_t rows,
double change_threshold,
long five_point,
long seven_point,
double * x_data,
double curvature_limit )

Definition at line 358 of file sddspeakfind.c.

360 {
361 long delta;
362 int64_t i, rows1;
363
364 delta = five_point ? 2 : (seven_point ? 3 : 1);
365 rows1 = rows - delta;
366
367 if (change_threshold > 0) {
368 for (i = delta; i < rows1; i++) {
369 if (row_flag[i]) {
370 if (!data[i] || (data[i] - data[i + delta]) / data[i] > change_threshold)
371 continue;
372 if (!data[i] || (data[i] - data[i - delta]) / data[i] > change_threshold)
373 continue;
374 row_flag[i] = 0;
375 }
376 }
377 }
378
379 if (curvature_limit > 0) {
380 double chi, coef[3], scoef[3], diff[7];
381 int32_t order[3] = {0, 1, 2};
382 for (i = delta; i < rows1; i++) {
383 if (row_flag[i]) {
384 lsfg(x_data + i - delta, data + i - delta, NULL, 2 * delta + 1,
385 3, order, coef, scoef, &chi, diff, ipower);
386 if (fabs(coef[2]) < curvature_limit)
387 row_flag[i] = 0;
388 }
389 }
390 }
391}
long lsfg(double *xd, double *yd, double *sy, long n_pts, long n_terms, int32_t *order, double *coef, double *s_coef, double *chi, double *diff, double(*fn)(double x, long ord))
Computes generalized least squares fits using a function passed by the caller.
Definition lsfg.c:30
double ipower(double x, long n)
Evaluate a power function x^n.