SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddspeakfind.c
Go to the documentation of this file.
1/**
2 * @file sddspeakfind.c
3 * @brief Program to detect and analyze peaks in SDDS data files.
4 *
5 * @details
6 * This program processes an SDDS (Self Describing Data Set) file to identify peaks
7 * within a specified column of data. Detection is based on configurable parameters
8 * like thresholds, curvature limits, exclusion zones, and more. The program supports
9 * input and output through files or pipes and offers compatibility with both row-major
10 * and column-major data storage orders.
11 *
12 * @section Usage
13 * ```
14 * sddspeakfind [<inputfile>] [<outputfile>]
15 * [-pipe=[input][,output]]
16 * -column=<columnName>
17 * [-threshold=<value>|@<parametername>]
18 * [-fivePoint]
19 * [-sevenPoint]
20 * [-exclusionZone=<fraction>]
21 * [-changeThreshold=<fractionalChange>]
22 * [-curvatureLimit=<xColumn>,<minValue>]
23 * [-majorOrder=row|column]
24 * ```
25 *
26 * @section Options
27 * | Required | Description |
28 * |-----------|-----------------------------------------------------------------------------|
29 * | `-column` | Specify the column name for peak detection. |
30 *
31 * | Optional | Description |
32 * |-----------|-----------------------------------------------------------------------------|
33 * | `-pipe` | Specify input/output through pipes. |
34 * | `-threshold` | Set a threshold for peak detection. |
35 * | `-fivePoint` | Use a 5-point criterion for peak detection. |
36 * | `-sevenPoint` | Use a 7-point criterion for peak detection. |
37 * | `-exclusionZone` | Exclude peaks too close to each other, specified as a fraction. |
38 * | `-changeThreshold` | Apply a fractional change threshold for filtering flat peaks. |
39 * | `-curvatureLimit` | Apply curvature filtering to exclude shallow peaks. |
40 * | `-majorOrder` | Specify row or column major order for output. |
41 *
42 * @subsection Incompatibilities
43 * - The following options are mutually exclusive:
44 * - `-fivePoint` and `-sevenPoint`
45 *
46 * @subsection Requirements
47 * - For `-curvatureLimit`:
48 * - Requires both `<xColumn>` and `<minValue>` to be specified.
49 * - For `-threshold`:
50 * - Accepts either a numeric value or a parameter name prefixed with `@`.
51 *
52 * @copyright
53 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
54 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
55 *
56 * @license
57 * This file is distributed under the terms of the Software License Agreement
58 * found in the file LICENSE included with this distribution.
59 *
60 * @authors
61 * M. Borland, C. Saunders, R. Soliday, H. Shang
62 */
63
64#include "mdb.h"
65#include "SDDS.h"
66#include "scan.h"
67
68#define THRESHOLD 0x0001UL
69#define FIVEPOINT 0x0002UL
70#define CHANGETHRES 0x0004UL
71#define EZONEFRAC 0x0008UL
72#define PAR_THRESHOLD 0x0010UL
73#define SEVENPOINT 0x0020UL
74#define CURVATURELIMIT 0x0040UL
75
76/* Enumeration for option types */
77enum option_type {
78 CLO_THRESHOLD,
79 CLO_FIVEPOINT,
80 CLO_CHANGETHRESHOLD,
81 CLO_PIPE,
82 CLO_COLUMN,
83 CLO_EXCLUSIONZONE,
84 CLO_MAJOR_ORDER,
85 CLO_SEVENPOINT,
86 CLO_CURVATURE_LIMIT,
87 N_OPTIONS
88};
89
90static char *option[N_OPTIONS] = {
91 "threshold",
92 "fivepoint",
93 "changethreshold",
94 "pipe",
95 "column",
96 "exclusionzone",
97 "majorOrder",
98 "sevenpoint",
99 "curvatureLimit"};
100
101static char *USAGE =
102 "sddspeakfind [<inputfile>] [<outputfile>] [-pipe=[input][,output]] \n"
103 " -column=<columnName> \n"
104 " [-threshold=<value>|@<parametername>] \n"
105 " [{-fivePoint|-sevenPoint}] \n"
106 " [-exclusionZone=<fraction>] \n"
107 " [-changeThreshold=<fractionalChange>] \n"
108 " [-curvatureLimit=<xColumn>,<minValue>] \n"
109 " [-majorOrder=row|column] \n\n"
110 "Finds peaks in a column of data as a function of row index.\n"
111 "Program by Michael Borland. ("__DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
112
113/* Function prototypes */
114void mark_peaks(double *data, int32_t *row_flag, int64_t rows, long n_points);
115void unmark_flat_peaks(double *data, int32_t *row_flag, int64_t rows,
116 double change_threshold, long five_point, long seven_point,
117 double *x_data, double curvature_limit);
118void unmark_excluded_peaks(double *data, int32_t *row_flag, int64_t rows,
119 double ezone_fraction);
120
121int main(int argc, char **argv) {
122 SDDS_DATASET in_set, out_set;
123 SCANNED_ARG *s_arg;
124 long i_arg, page_returned;
125 int64_t rows, row;
126 int32_t *row_flag;
127 char *input, *output, *column_name, *par_threshold_name, *x_column_name;
128 double *data, *x_data;
129 unsigned long pipe_flags, flags, major_order_flag;
130 double threshold, ezone_fraction, change_threshold, curvature_limit;
131 short column_major_order = -1;
132
134 argc = scanargs(&s_arg, argc, argv);
135 if (argc < 2 || argc > (2 + N_OPTIONS))
136 bomb(NULL, USAGE);
137
138 flags = pipe_flags = 0;
139 input = output = NULL;
140 column_name = NULL;
141 ezone_fraction = change_threshold = curvature_limit = 0;
142 row_flag = NULL;
143 par_threshold_name = NULL;
144 x_column_name = NULL;
145 x_data = NULL;
146
147 for (i_arg = 1; i_arg < argc; i_arg++) {
148 if (s_arg[i_arg].arg_type == OPTION) {
149 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
150 case CLO_MAJOR_ORDER:
151 major_order_flag = 0;
152 s_arg[i_arg].n_items--;
153 if (s_arg[i_arg].n_items > 0 &&
154 (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1,
155 &s_arg[i_arg].n_items, 0, "row", -1, NULL, 0,
156 SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0,
157 SDDS_COLUMN_MAJOR_ORDER, NULL)))
158 SDDS_Bomb("invalid -majorOrder syntax/values");
159 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER)
160 column_major_order = 1;
161 else if (major_order_flag & SDDS_ROW_MAJOR_ORDER)
162 column_major_order = 0;
163 break;
164 case CLO_THRESHOLD:
165 if (s_arg[i_arg].n_items == 2) {
166 if (s_arg[i_arg].list[1][0] == '@') {
167 SDDS_CopyString(&par_threshold_name, s_arg[i_arg].list[1] + 1);
168 flags |= PAR_THRESHOLD;
169 } else {
170 if (sscanf(s_arg[i_arg].list[1], "%lf", &threshold) != 1)
171 SDDS_Bomb("incorrect -threshold syntax");
172 flags |= THRESHOLD;
173 }
174 } else
175 SDDS_Bomb("incorrect -threshold syntax");
176 break;
177 case CLO_FIVEPOINT:
178 flags |= FIVEPOINT;
179 break;
180 case CLO_SEVENPOINT:
181 flags |= SEVENPOINT;
182 break;
183 case CLO_CHANGETHRESHOLD:
184 if (s_arg[i_arg].n_items != 2 ||
185 sscanf(s_arg[i_arg].list[1], "%lf", &change_threshold) != 1 ||
186 change_threshold <= 0)
187 SDDS_Bomb("incorrect -changeThreshold syntax or values");
188 flags |= CHANGETHRES;
189 break;
190 case CLO_CURVATURE_LIMIT:
191 if (s_arg[i_arg].n_items != 3 ||
192 !strlen(x_column_name = s_arg[i_arg].list[1]) ||
193 sscanf(s_arg[i_arg].list[2], "%lf", &curvature_limit) != 1 ||
194 curvature_limit <= 0)
195 SDDS_Bomb("incorrect -curvatureLimit syntax or values");
196 flags |= CURVATURELIMIT;
197 break;
198 case CLO_PIPE:
199 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags))
200 SDDS_Bomb("invalid -pipe syntax");
201 break;
202 case CLO_COLUMN:
203 if (s_arg[i_arg].n_items != 2)
204 SDDS_Bomb("invalid -column syntax");
205 column_name = s_arg[i_arg].list[1];
206 break;
207 case CLO_EXCLUSIONZONE:
208 if (s_arg[i_arg].n_items != 2 ||
209 sscanf(s_arg[i_arg].list[1], "%lf", &ezone_fraction) != 1 ||
210 ezone_fraction <= 0)
211 SDDS_Bomb("invalid -exclusionZone syntax or value");
212 flags |= EZONEFRAC;
213 break;
214 default:
215 fprintf(stderr, "error: unknown/ambiguous option: %s\n",
216 s_arg[i_arg].list[0]);
217 exit(1);
218 break;
219 }
220 } else {
221 if (input == NULL)
222 input = s_arg[i_arg].list[0];
223 else if (output == NULL)
224 output = s_arg[i_arg].list[0];
225 else
226 SDDS_Bomb("too many filenames");
227 }
228 }
229
230 processFilenames("sddspeakfind", &input, &output, pipe_flags, 0, NULL);
231
232 if (!column_name)
233 SDDS_Bomb("-column option must be given");
234
235 if (!SDDS_InitializeInput(&in_set, input))
236 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
237
238 if (!SDDS_FindColumn(&in_set, FIND_NUMERIC_TYPE, column_name, NULL))
239 SDDS_Bomb("the given column is nonexistent or nonnumeric");
240
241 if (x_column_name &&
242 !SDDS_FindColumn(&in_set, FIND_NUMERIC_TYPE, x_column_name, NULL))
243 SDDS_Bomb("the given x column is nonexistent or nonnumeric");
244
245 if (!SDDS_InitializeCopy(&out_set, &in_set, output, "w"))
246 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
247
248 out_set.layout.data_mode.column_major =
249 column_major_order != -1 ? column_major_order : in_set.layout.data_mode.column_major;
250
251 if (!SDDS_WriteLayout(&out_set))
252 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
253
254 /* Main loop for processing pages */
255 while ((page_returned = SDDS_ReadPage(&in_set)) > 0) {
256 if (!SDDS_CopyPage(&out_set, &in_set)) {
257 SDDS_SetError("Problem copying data for output file");
258 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
259 }
260
261 if ((rows = SDDS_CountRowsOfInterest(&out_set)) > 1) {
262 data = SDDS_GetColumnInDoubles(&in_set, column_name);
263 if (!data)
264 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
265
266 if (x_column_name)
267 x_data = SDDS_GetColumnInDoubles(&in_set, x_column_name);
268 if (x_column_name && !x_data)
269 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
270
271 row_flag = SDDS_Realloc(row_flag, sizeof(*row_flag) * rows);
272 for (row = 0; row < rows; row++)
273 row_flag[row] = 0;
274
275 mark_peaks(data, row_flag, rows,
276 flags & SEVENPOINT ? 7 : (flags & FIVEPOINT ? 5 : 3));
277
278 if (flags & PAR_THRESHOLD)
279 if (!SDDS_GetParameter(&in_set, par_threshold_name, &threshold))
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281
282 if (flags & THRESHOLD || flags & PAR_THRESHOLD)
283 for (row = 0; row < rows; row++)
284 if (row_flag[row] && data[row] < threshold)
285 row_flag[row] = 0;
286
287 if (flags & CHANGETHRES || flags & CURVATURELIMIT)
288 unmark_flat_peaks(data, row_flag, rows, change_threshold,
289 flags & FIVEPOINT, flags & SEVENPOINT, x_data, curvature_limit);
290
291 if (flags & EZONEFRAC)
292 unmark_excluded_peaks(data, row_flag, rows, ezone_fraction);
293
294 if (!SDDS_AssertRowFlags(&out_set, SDDS_FLAG_ARRAY, row_flag, rows))
295 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
296
297 free(data);
298 }
299
300 if (!SDDS_WritePage(&out_set))
301 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
302 }
303
304 if (!SDDS_Terminate(&in_set) || !SDDS_Terminate(&out_set)) {
305 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
306 exit(1);
307 }
308
309 free_scanargs(&s_arg, argc);
310 free(par_threshold_name);
311 free(row_flag);
312
313 return 0;
314}
315
316void mark_peaks(double *data, int32_t *row_flag, int64_t rows, long n_points) {
317 double y0, y1, y2;
318 int64_t i, rows_minus;
319 short five_point = 0;
320
321 switch (n_points) {
322 case 5:
323 y0 = data[1];
324 y1 = data[2];
325 if (rows < 5)
326 return;
327 five_point = 1;
328 break;
329 case 7:
330 if (rows < 7)
331 return;
332 for (i = 3; i < (rows - 3); i++) {
333 if ((data[i - 3] < data[i - 2] && data[i - 2] < data[i - 1] &&
334 data[i - 1] < data[i] && data[i + 1] < data[i] &&
335 data[i + 2] < data[i + 1] && data[i + 3] < data[i + 2]))
336 row_flag[i] = 1;
337 }
338 return;
339 default:
340 if (rows < 3)
341 return;
342 y0 = data[0];
343 y1 = data[1];
344 }
345
346 rows_minus = rows - (five_point ? 2 : 1);
347 for (i = (five_point ? 2 : 1); i < rows_minus; i++) {
348 y2 = data[i + 1];
349 if ((y1 > y0 && y1 > y2) || (y1 == y0 && y1 > y2) || (y1 > y0 && y1 == y2)) {
350 if (!five_point || (data[i - 2] < y0 && data[i + 2] < y2))
351 row_flag[i] = 1;
352 }
353 y0 = y1;
354 y1 = y2;
355 }
356}
357
358void unmark_flat_peaks(double *data, int32_t *row_flag, int64_t rows,
359 double change_threshold, long five_point, long seven_point,
360 double *x_data, double curvature_limit) {
361 long delta;
362 int64_t i, rows1;
363
364 delta = five_point ? 2 : (seven_point ? 3 : 1);
365 rows1 = rows - delta;
366
367 if (change_threshold > 0) {
368 for (i = delta; i < rows1; i++) {
369 if (row_flag[i]) {
370 if (!data[i] || (data[i] - data[i + delta]) / data[i] > change_threshold)
371 continue;
372 if (!data[i] || (data[i] - data[i - delta]) / data[i] > change_threshold)
373 continue;
374 row_flag[i] = 0;
375 }
376 }
377 }
378
379 if (curvature_limit > 0) {
380 double chi, coef[3], scoef[3], diff[7];
381 int32_t order[3] = {0, 1, 2};
382 for (i = delta; i < rows1; i++) {
383 if (row_flag[i]) {
384 lsfg(x_data + i - delta, data + i - delta, NULL, 2 * delta + 1,
385 3, order, coef, scoef, &chi, diff, ipower);
386 if (fabs(coef[2]) < curvature_limit)
387 row_flag[i] = 0;
388 }
389 }
390 }
391}
392
393void unmark_excluded_peaks(double *data, int32_t *row_flag, int64_t rows,
394 double ezone_fraction) {
395 int64_t lower, upper, i, j, rows1;
396 rows1 = rows - 1;
397
398 for (i = 0; i < rows; i++) {
399 if (row_flag[i]) {
400 lower = i - ((int64_t)(ezone_fraction / 2.0 * rows + 0.5));
401 if (lower < 0)
402 lower = 0;
403 upper = i + ((int64_t)(ezone_fraction / 2.0 * rows + 0.5));
404 if (upper > rows1)
405 upper = rows1;
406 for (j = lower; j <= upper; j++) {
407 if (j != i && row_flag[j] && data[j] <= data[i])
408 row_flag[j] = 0;
409 }
410 }
411 }
412}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_AssertRowFlags(SDDS_DATASET *SDDS_dataset, uint32_t mode,...)
Sets acceptance flags for rows based on specified criteria.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
char * SDDS_FindColumn(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Finds the first column in the SDDS dataset that matches the specified criteria.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long lsfg(double *xd, double *yd, double *sy, long n_pts, long n_terms, int32_t *order, double *coef, double *s_coef, double *chi, double *diff, double(*fn)(double x, long ord))
Computes generalized least squares fits using a function passed by the caller.
Definition lsfg.c:30
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
double ipower(double x, long n)
Evaluate a power function x^n.