SDDSlib
Loading...
Searching...
No Matches
sddspeakfind.c
Go to the documentation of this file.
1/**
2 * @file sddspeakfind.c
3 * @brief Program to find peaks in a column of SDDS data as a function of row index.
4 *
5 * This program processes an SDDS (Self Describing Data Set) file and identifies peaks
6 * in a specified column based on certain criteria such as threshold, exclusion zones,
7 * and curvature limits. The program supports both input and output from files or pipes.
8 *
9 * ### Features:
10 * - Detects peaks using 3-point, 5-point, or 7-point criteria.
11 * - Applies optional thresholds, curvature limits, and exclusion zones to filter peaks.
12 * - Outputs the processed data to an SDDS file.
13 * - Supports row-major and column-major orders for data storage.
14 *
15 * ### Options:
16 * - `-threshold`: Set a threshold for peak detection.
17 * - `-fivePoint` or `-sevenPoint`: Use 5-point or 7-point methods for peak detection.
18 * - `-changeThreshold`: Apply a fractional change threshold to flat peaks.
19 * - `-curvatureLimit`: Apply curvature filtering to remove shallow peaks.
20 * - `-exclusionZone`: Exclude peaks too close to each other.
21 * - `-column`: Specify the column to analyze.
22 * - `-pipe`: Use input/output pipes for SDDS data.
23 * - `-majorOrder`: Specify row or column major order for the output.
24 *
25 * ### Usage:
26 * ```
27 * sddspeakfind [<inputfile>] [<outputfile>] [-pipe=[input][,output]] \
28 * -column=<columnName> \
29 * [-threshold=<value>|@<parametername>] \
30 * [{-fivePoint|-sevenPoint}] \
31 * [-exclusionZone=<fraction>] \
32 * [-changeThreshold=<fractionalChange>] \
33 * [-curvatureLimit=<xColumn>,<minValue>] \
34 * [-majorOrder=row|column]
35 * ```
36 *
37 * @copyright
38 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
39 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
40 *
41 * @license
42 * This file is distributed under the terms of the Software License Agreement
43 * found in the file LICENSE included with this distribution.
44 *
45 * @author M. Borland, C. Saunders, R. Soliday, H. Shang
46 */
47
48#include "mdb.h"
49#include "SDDS.h"
50#include "scan.h"
51
52#define THRESHOLD 0x0001UL
53#define FIVEPOINT 0x0002UL
54#define CHANGETHRES 0x0004UL
55#define EZONEFRAC 0x0008UL
56#define PAR_THRESHOLD 0x0010UL
57#define SEVENPOINT 0x0020UL
58#define CURVATURELIMIT 0x0040UL
59
60/* Enumeration for option types */
61enum option_type {
62 CLO_THRESHOLD,
63 CLO_FIVEPOINT,
64 CLO_CHANGETHRESHOLD,
65 CLO_PIPE,
66 CLO_COLUMN,
67 CLO_EXCLUSIONZONE,
68 CLO_MAJOR_ORDER,
69 CLO_SEVENPOINT,
70 CLO_CURVATURE_LIMIT,
71 N_OPTIONS
72};
73
74static char *option[N_OPTIONS] = {
75 "threshold",
76 "fivepoint",
77 "changethreshold",
78 "pipe",
79 "column",
80 "exclusionzone",
81 "majorOrder",
82 "sevenpoint",
83 "curvatureLimit"};
84
85static char *USAGE =
86 "sddspeakfind [<inputfile>] [<outputfile>] [-pipe=[input][,output]] \n"
87 " -column=<columnName> \n"
88 " [-threshold=<value>|@<parametername>] \n"
89 " [{-fivePoint|-sevenPoint}] \n"
90 " [-exclusionZone=<fraction>] \n"
91 " [-changeThreshold=<fractionalChange>] \n"
92 " [-curvatureLimit=<xColumn>,<minValue>] \n"
93 " [-majorOrder=row|column] \n\n"
94 "Finds peaks in a column of data as a function of row index.\n"
95 "Program by Michael Borland. ("__DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
96
97/* Function prototypes */
98void mark_peaks(double *data, int32_t *row_flag, int64_t rows, long n_points);
99void unmark_flat_peaks(double *data, int32_t *row_flag, int64_t rows,
100 double change_threshold, long five_point, long seven_point,
101 double *x_data, double curvature_limit);
102void unmark_excluded_peaks(double *data, int32_t *row_flag, int64_t rows,
103 double ezone_fraction);
104
105int main(int argc, char **argv) {
106 SDDS_DATASET in_set, out_set;
107 SCANNED_ARG *s_arg;
108 long i_arg, page_returned;
109 int64_t rows, row;
110 int32_t *row_flag;
111 char *input, *output, *column_name, *par_threshold_name, *x_column_name;
112 double *data, *x_data;
113 unsigned long pipe_flags, flags, major_order_flag;
114 double threshold, ezone_fraction, change_threshold, curvature_limit;
115 short column_major_order = -1;
116
118 argc = scanargs(&s_arg, argc, argv);
119 if (argc < 2 || argc > (2 + N_OPTIONS))
120 bomb(NULL, USAGE);
121
122 flags = pipe_flags = 0;
123 input = output = NULL;
124 column_name = NULL;
125 ezone_fraction = change_threshold = curvature_limit = 0;
126 row_flag = NULL;
127 par_threshold_name = NULL;
128 x_column_name = NULL;
129 x_data = NULL;
130
131 for (i_arg = 1; i_arg < argc; i_arg++) {
132 if (s_arg[i_arg].arg_type == OPTION) {
133 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
134 case CLO_MAJOR_ORDER:
135 major_order_flag = 0;
136 s_arg[i_arg].n_items--;
137 if (s_arg[i_arg].n_items > 0 &&
138 (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1,
139 &s_arg[i_arg].n_items, 0, "row", -1, NULL, 0,
140 SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0,
141 SDDS_COLUMN_MAJOR_ORDER, NULL)))
142 SDDS_Bomb("invalid -majorOrder syntax/values");
143 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER)
144 column_major_order = 1;
145 else if (major_order_flag & SDDS_ROW_MAJOR_ORDER)
146 column_major_order = 0;
147 break;
148 case CLO_THRESHOLD:
149 if (s_arg[i_arg].n_items == 2) {
150 if (s_arg[i_arg].list[1][0] == '@') {
151 SDDS_CopyString(&par_threshold_name, s_arg[i_arg].list[1] + 1);
152 flags |= PAR_THRESHOLD;
153 } else {
154 if (sscanf(s_arg[i_arg].list[1], "%lf", &threshold) != 1)
155 SDDS_Bomb("incorrect -threshold syntax");
156 flags |= THRESHOLD;
157 }
158 } else
159 SDDS_Bomb("incorrect -threshold syntax");
160 break;
161 case CLO_FIVEPOINT:
162 flags |= FIVEPOINT;
163 break;
164 case CLO_SEVENPOINT:
165 flags |= SEVENPOINT;
166 break;
167 case CLO_CHANGETHRESHOLD:
168 if (s_arg[i_arg].n_items != 2 ||
169 sscanf(s_arg[i_arg].list[1], "%lf", &change_threshold) != 1 ||
170 change_threshold <= 0)
171 SDDS_Bomb("incorrect -changeThreshold syntax or values");
172 flags |= CHANGETHRES;
173 break;
174 case CLO_CURVATURE_LIMIT:
175 if (s_arg[i_arg].n_items != 3 ||
176 !strlen(x_column_name = s_arg[i_arg].list[1]) ||
177 sscanf(s_arg[i_arg].list[2], "%lf", &curvature_limit) != 1 ||
178 curvature_limit <= 0)
179 SDDS_Bomb("incorrect -curvatureLimit syntax or values");
180 flags |= CURVATURELIMIT;
181 break;
182 case CLO_PIPE:
183 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags))
184 SDDS_Bomb("invalid -pipe syntax");
185 break;
186 case CLO_COLUMN:
187 if (s_arg[i_arg].n_items != 2)
188 SDDS_Bomb("invalid -column syntax");
189 column_name = s_arg[i_arg].list[1];
190 break;
191 case CLO_EXCLUSIONZONE:
192 if (s_arg[i_arg].n_items != 2 ||
193 sscanf(s_arg[i_arg].list[1], "%lf", &ezone_fraction) != 1 ||
194 ezone_fraction <= 0)
195 SDDS_Bomb("invalid -exclusionZone syntax or value");
196 flags |= EZONEFRAC;
197 break;
198 default:
199 fprintf(stderr, "error: unknown/ambiguous option: %s\n",
200 s_arg[i_arg].list[0]);
201 exit(1);
202 break;
203 }
204 } else {
205 if (input == NULL)
206 input = s_arg[i_arg].list[0];
207 else if (output == NULL)
208 output = s_arg[i_arg].list[0];
209 else
210 SDDS_Bomb("too many filenames");
211 }
212 }
213
214 processFilenames("sddspeakfind", &input, &output, pipe_flags, 0, NULL);
215
216 if (!column_name)
217 SDDS_Bomb("-column option must be given");
218
219 if (!SDDS_InitializeInput(&in_set, input))
220 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
221
222 if (!SDDS_FindColumn(&in_set, FIND_NUMERIC_TYPE, column_name, NULL))
223 SDDS_Bomb("the given column is nonexistent or nonnumeric");
224
225 if (x_column_name &&
226 !SDDS_FindColumn(&in_set, FIND_NUMERIC_TYPE, x_column_name, NULL))
227 SDDS_Bomb("the given x column is nonexistent or nonnumeric");
228
229 if (!SDDS_InitializeCopy(&out_set, &in_set, output, "w"))
230 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
231
232 out_set.layout.data_mode.column_major =
233 column_major_order != -1 ? column_major_order : in_set.layout.data_mode.column_major;
234
235 if (!SDDS_WriteLayout(&out_set))
236 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
237
238 /* Main loop for processing pages */
239 while ((page_returned = SDDS_ReadPage(&in_set)) > 0) {
240 if (!SDDS_CopyPage(&out_set, &in_set)) {
241 SDDS_SetError("Problem copying data for output file");
242 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
243 }
244
245 if ((rows = SDDS_CountRowsOfInterest(&out_set)) > 1) {
246 data = SDDS_GetColumnInDoubles(&in_set, column_name);
247 if (!data)
248 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
249
250 if (x_column_name)
251 x_data = SDDS_GetColumnInDoubles(&in_set, x_column_name);
252 if (x_column_name && !x_data)
253 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
254
255 row_flag = SDDS_Realloc(row_flag, sizeof(*row_flag) * rows);
256 for (row = 0; row < rows; row++)
257 row_flag[row] = 0;
258
259 mark_peaks(data, row_flag, rows,
260 flags & SEVENPOINT ? 7 : (flags & FIVEPOINT ? 5 : 3));
261
262 if (flags & PAR_THRESHOLD)
263 if (!SDDS_GetParameter(&in_set, par_threshold_name, &threshold))
264 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
265
266 if (flags & THRESHOLD || flags & PAR_THRESHOLD)
267 for (row = 0; row < rows; row++)
268 if (row_flag[row] && data[row] < threshold)
269 row_flag[row] = 0;
270
271 if (flags & CHANGETHRES || flags & CURVATURELIMIT)
272 unmark_flat_peaks(data, row_flag, rows, change_threshold,
273 flags & FIVEPOINT, flags & SEVENPOINT, x_data, curvature_limit);
274
275 if (flags & EZONEFRAC)
276 unmark_excluded_peaks(data, row_flag, rows, ezone_fraction);
277
278 if (!SDDS_AssertRowFlags(&out_set, SDDS_FLAG_ARRAY, row_flag, rows))
279 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
280
281 free(data);
282 }
283
284 if (!SDDS_WritePage(&out_set))
285 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
286 }
287
288 if (!SDDS_Terminate(&in_set) || !SDDS_Terminate(&out_set)) {
289 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
290 exit(1);
291 }
292
293 free_scanargs(&s_arg, argc);
294 free(par_threshold_name);
295 free(row_flag);
296
297 return 0;
298}
299
300void mark_peaks(double *data, int32_t *row_flag, int64_t rows, long n_points) {
301 double y0, y1, y2;
302 int64_t i, rows_minus;
303 short five_point = 0;
304
305 switch (n_points) {
306 case 5:
307 y0 = data[1];
308 y1 = data[2];
309 if (rows < 5)
310 return;
311 five_point = 1;
312 break;
313 case 7:
314 if (rows < 7)
315 return;
316 for (i = 3; i < (rows - 3); i++) {
317 if ((data[i - 3] < data[i - 2] && data[i - 2] < data[i - 1] &&
318 data[i - 1] < data[i] && data[i + 1] < data[i] &&
319 data[i + 2] < data[i + 1] && data[i + 3] < data[i + 2]))
320 row_flag[i] = 1;
321 }
322 return;
323 default:
324 if (rows < 3)
325 return;
326 y0 = data[0];
327 y1 = data[1];
328 }
329
330 rows_minus = rows - (five_point ? 2 : 1);
331 for (i = (five_point ? 2 : 1); i < rows_minus; i++) {
332 y2 = data[i + 1];
333 if ((y1 > y0 && y1 > y2) || (y1 == y0 && y1 > y2) || (y1 > y0 && y1 == y2)) {
334 if (!five_point || (data[i - 2] < y0 && data[i + 2] < y2))
335 row_flag[i] = 1;
336 }
337 y0 = y1;
338 y1 = y2;
339 }
340}
341
342void unmark_flat_peaks(double *data, int32_t *row_flag, int64_t rows,
343 double change_threshold, long five_point, long seven_point,
344 double *x_data, double curvature_limit) {
345 long delta;
346 int64_t i, rows1;
347
348 delta = five_point ? 2 : (seven_point ? 3 : 1);
349 rows1 = rows - delta;
350
351 if (change_threshold > 0) {
352 for (i = delta; i < rows1; i++) {
353 if (row_flag[i]) {
354 if (!data[i] || (data[i] - data[i + delta]) / data[i] > change_threshold)
355 continue;
356 if (!data[i] || (data[i] - data[i - delta]) / data[i] > change_threshold)
357 continue;
358 row_flag[i] = 0;
359 }
360 }
361 }
362
363 if (curvature_limit > 0) {
364 double chi, coef[3], scoef[3], diff[7];
365 int32_t order[3] = {0, 1, 2};
366 for (i = delta; i < rows1; i++) {
367 if (row_flag[i]) {
368 lsfg(x_data + i - delta, data + i - delta, NULL, 2 * delta + 1,
369 3, order, coef, scoef, &chi, diff, ipower);
370 if (fabs(coef[2]) < curvature_limit)
371 row_flag[i] = 0;
372 }
373 }
374 }
375}
376
377void unmark_excluded_peaks(double *data, int32_t *row_flag, int64_t rows,
378 double ezone_fraction) {
379 int64_t lower, upper, i, j, rows1;
380 rows1 = rows - 1;
381
382 for (i = 0; i < rows; i++) {
383 if (row_flag[i]) {
384 lower = i - ((int64_t)(ezone_fraction / 2.0 * rows + 0.5));
385 if (lower < 0)
386 lower = 0;
387 upper = i + ((int64_t)(ezone_fraction / 2.0 * rows + 0.5));
388 if (upper > rows1)
389 upper = rows1;
390 for (j = lower; j <= upper; j++) {
391 if (j != i && row_flag[j] && data[j] <= data[i])
392 row_flag[j] = 0;
393 }
394 }
395 }
396}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_AssertRowFlags(SDDS_DATASET *SDDS_dataset, uint32_t mode,...)
Sets acceptance flags for rows based on specified criteria.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
char * SDDS_FindColumn(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Finds the first column in the SDDS dataset that matches the specified criteria.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long lsfg(double *xd, double *yd, double *sy, long n_pts, long n_terms, int32_t *order, double *coef, double *s_coef, double *chi, double *diff, double(*fn)(double x, long ord))
Computes generalized least squares fits using a function passed by the caller.
Definition lsfg.c:30
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
double ipower(double x, long n)
Evaluate a power function x^n.