SDDSlib
Loading...
Searching...
No Matches
sddscliptails.c
Go to the documentation of this file.
1/**
2 * @file sddscliptails.c
3 * @brief Program for processing SDDS files by clipping tails from specified columns.
4 *
5 * This program reads an SDDS file, processes specified columns to clip data tails based on
6 * various criteria such as fractional limits, absolute limits, or full-width-half-maximum (FWHM),
7 * and writes the processed data to an output SDDS file. It provides options for configuring the
8 * clipping behavior and supports both row and column major order for output.
9 *
10 * ### Key Features:
11 * - Supports multiple clipping criteria: fractional limit, absolute value, FWHM, and separation by zero.
12 * - Handles both row and column major order for output data.
13 * - Allows flexible specification of columns to process via command-line options.
14 *
15 * ### Usage:
16 * ```
17 * sddscliptails [<input>] [<output>] [-pipe=[in][,out]]
18 * [-columns=<listOfNames>] [-fractional=<value>] [-absolute=<value>] [-fwhm=<multiplier>]
19 * [-afterzero[=<bufferWidth>]] [-majorOrder=row|column]
20 * ```
21 *
22 * ### Options:
23 * - **-columns**: Specify the list of columns to process.
24 * - **-fractional**: Clip tails below a fraction of the peak value.
25 * - **-absolute**: Clip tails below a specified absolute value.
26 * - **-fwhm**: Clip tails beyond a specified number of FWHMs from the peak.
27 * - **-afterzero**: Clip tails separated from the peak by zero values, optionally with a buffer width.
28 * - **-majorOrder**: Specify row or column major order for the output file.
29 *
30 * ### Example:
31 * ```
32 * sddscliptails input.sdds output.sdds -columns=Column1,Column2 -fractional=0.1
33 * ```
34 *
35 * @copyright
36 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
37 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
38 *
39 * @license
40 * This file is distributed under the terms of the Software License Agreement
41 * found in the file LICENSE included with this distribution.
42 *
43 * @author M. Borland, R. Soliday, H. Shang
44 */
45
46#include "mdb.h"
47#include "scan.h"
48#include "SDDS.h"
49
50/* Enumeration for option types */
51typedef enum {
52 CLO_FRACTIONAL,
53 CLO_ABSOLUTE,
54 CLO_FWHM,
55 CLO_PIPE,
56 CLO_COLUMNS,
57 CLO_AFTERZERO,
58 CLO_MAJOR_ORDER,
59 N_OPTIONS
60} option_type;
61
62static char *option[N_OPTIONS] = {
63 "fractional",
64 "absolute",
65 "fwhm",
66 "pipe",
67 "columns",
68 "afterzero",
69 "majorOrder",
70};
71
72static char *usage =
73 "sddscliptails [<input>] [<output>] [-pipe=[in][,out]]\n"
74 " [-columns=<listOfNames>] [-fractional=<value>] [-absolute=<value>] [-fwhm=<multiplier>]\n"
75 " [-afterzero[=<bufferWidth>]] [-majorOrder=row|column]\n\n"
76 "-columns List of columns to process.\n"
77 "-fractional Clip a tail if it falls below this fraction of the peak.\n"
78 "-absolute Clip a tail if it falls below this absolute value.\n"
79 "-fwhm Clip a tail if it is beyond this many FWHM from the peak.\n"
80 "-afterzero Clip a tail if it is separated from the peak by values equal to zero.\n"
81 " If <bufferWidth> is specified, then a region <bufferWidth> wide is kept\n"
82 " on either side of the peak, if possible.\n"
83 "-majorOrder Writes output file in row or column major order.\n\n"
84 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
85
86static long resolve_column_names(SDDS_DATASET *sdds_in, char ***column, int32_t *columns);
87static void clip_tail(double *data, int64_t rows, double abs_limit, double frac_limit, short *in_tail);
88static void clip_fwhm(double *data, int64_t n_data, double fwhm_limit, double *indep_data, short *in_tail);
89static void clip_after_zero(double *data, int64_t rows, long buffer_width, short *in_tail);
90
91int main(int argc, char **argv) {
92 int i_arg;
93 char **input_column;
94 char *input, *output;
95 long read_code, after_zero, after_zero_buffer_width;
96 int64_t i, rows;
97 int32_t columns;
98 unsigned long pipe_flags, major_order_flag;
99 SCANNED_ARG *scanned;
100 SDDS_DATASET sdds_in, sdds_out;
101 double *data, *indep_data;
102 double fractional_limit, absolute_limit, fwhm_limit;
103 short *in_tail;
104 short column_major_order = -1;
105
107 argc = scanargs(&scanned, argc, argv);
108 if (argc < 2)
109 bomb(NULL, usage);
110
111 output = input = NULL;
112 input_column = NULL;
113 columns = 0;
114 pipe_flags = 0;
115 fractional_limit = fwhm_limit = 0;
116 absolute_limit = -1;
117 after_zero = 0;
118
119 for (i_arg = 1; i_arg < argc; i_arg++) {
120 if (scanned[i_arg].arg_type == OPTION) {
121 /* Process options */
122 switch (match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
123 case CLO_MAJOR_ORDER:
124 major_order_flag = 0;
125 scanned[i_arg].n_items--;
126 if (scanned[i_arg].n_items > 0 &&
127 (!scanItemList(&major_order_flag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
128 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
129 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
130 SDDS_Bomb("invalid -majorOrder syntax/values");
131 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER)
132 column_major_order = 1;
133 else if (major_order_flag & SDDS_ROW_MAJOR_ORDER)
134 column_major_order = 0;
135 break;
136 case CLO_COLUMNS:
137 if (scanned[i_arg].n_items < 2)
138 SDDS_Bomb("invalid -columns syntax");
139 input_column = tmalloc(sizeof(*input_column) * (columns = scanned[i_arg].n_items - 1));
140 for (i = 0; i < columns; i++)
141 input_column[i] = scanned[i_arg].list[i + 1];
142 break;
143 case CLO_PIPE:
144 if (!processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipe_flags))
145 SDDS_Bomb("invalid -pipe syntax");
146 break;
147 case CLO_FRACTIONAL:
148 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &fractional_limit) != 1 || fractional_limit < 0)
149 SDDS_Bomb("invalid -fractional syntax");
150 break;
151 case CLO_ABSOLUTE:
152 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &absolute_limit) != 1 || absolute_limit < 0)
153 SDDS_Bomb("invalid -absolute syntax");
154 break;
155 case CLO_FWHM:
156 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &fwhm_limit) != 1 || fwhm_limit < 0)
157 SDDS_Bomb("invalid -fwhm syntax");
158 break;
159 case CLO_AFTERZERO:
160 after_zero = 1;
161 if (scanned[i_arg].n_items > 2 ||
162 (scanned[i_arg].n_items == 2 &&
163 (sscanf(scanned[i_arg].list[1], "%ld", &after_zero_buffer_width) != 1 ||
164 after_zero_buffer_width <= 0)))
165 SDDS_Bomb("invalid -afterZero syntax");
166 break;
167 default:
168 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[i_arg].list[0]);
169 exit(1);
170 break;
171 }
172 } else {
173 if (!input)
174 input = scanned[i_arg].list[0];
175 else if (!output)
176 output = scanned[i_arg].list[0];
177 else
178 SDDS_Bomb("too many filenames seen");
179 }
180 }
181
182 processFilenames("sddscliptails", &input, &output, pipe_flags, 0, NULL);
183
184 if (!columns)
185 SDDS_Bomb("supply the names of columns to process with the -columns option");
186
187 if (!SDDS_InitializeInput(&sdds_in, input))
188 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
189
190 if (!resolve_column_names(&sdds_in, &input_column, &columns))
191 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
192 if (!columns)
193 SDDS_Bomb("no columns selected for processing");
194
195 if (!SDDS_InitializeCopy(&sdds_out, &sdds_in, output, "w"))
196 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
197 if (column_major_order != -1)
198 sdds_out.layout.data_mode.column_major = column_major_order;
199 else
200 sdds_out.layout.data_mode.column_major = sdds_in.layout.data_mode.column_major;
201
202 if (!SDDS_DefineColumn(&sdds_out, "InTail", NULL, NULL, NULL, NULL, SDDS_SHORT, 0))
203 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
204
205 if (!SDDS_WriteLayout(&sdds_out))
206 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
207
208 indep_data = NULL;
209 in_tail = NULL;
210 while ((read_code = SDDS_ReadPage(&sdds_in)) > 0) {
211 if (!SDDS_CopyPage(&sdds_out, &sdds_in))
212 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
213 if ((rows = SDDS_CountRowsOfInterest(&sdds_in))) {
214 if (fwhm_limit > 0) {
215 indep_data = SDDS_Realloc(indep_data, sizeof(*indep_data) * rows);
216 if (!indep_data)
217 SDDS_Bomb("memory allocation failure");
218 for (i = 0; i < rows; i++)
219 indep_data[i] = i;
220 }
221 in_tail = SDDS_Realloc(in_tail, sizeof(*in_tail) * rows);
222 if (!in_tail)
223 SDDS_Bomb("memory allocation failure");
224 for (i = 0; i < rows; i++)
225 in_tail[i] = 0;
226 for (i = 0; i < columns; i++) {
227 data = SDDS_GetColumnInDoubles(&sdds_in, input_column[i]);
228 if (!data)
229 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
230 clip_tail(data, rows, absolute_limit, fractional_limit, in_tail);
231 if (fwhm_limit > 0)
232 clip_fwhm(data, rows, fwhm_limit, indep_data, in_tail);
233 if (after_zero)
234 clip_after_zero(data, rows, after_zero_buffer_width, in_tail);
235 if (!SDDS_SetColumnFromDoubles(&sdds_out, SDDS_SET_BY_NAME, data, rows, input_column[i]))
236 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
237 free(data);
238 }
239 }
240 if (!SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, in_tail, rows, "InTail"))
241 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
242 if (!SDDS_WritePage(&sdds_out))
243 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
244 }
245 if (SDDS_Terminate(&sdds_in) != 1) {
246 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
247 return 1;
248 }
249 if (SDDS_Terminate(&sdds_out) != 1) {
250 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
251 return 1;
252 }
253 if (indep_data)
254 free(indep_data);
255 if (in_tail)
256 free(in_tail);
257 return 0;
258}
259
260static long resolve_column_names(SDDS_DATASET *sdds_in, char ***column, int32_t *columns) {
261 long i;
262
263 SDDS_SetColumnFlags(sdds_in, 0);
264 for (i = 0; i < *columns; i++) {
265 if (!SDDS_SetColumnsOfInterest(sdds_in, SDDS_MATCH_STRING, (*column)[i], SDDS_OR))
266 return 0;
267 }
268 free(*column);
269 *column = SDDS_GetColumnNames(sdds_in, columns);
270 if (!*column || *columns == 0) {
271 SDDS_SetError("no columns found");
272 return 0;
273 }
274 return 1;
275}
276
277static void clip_tail(double *data, int64_t rows, double abs_limit, double frac_limit, short *in_tail) {
278 long clip;
279 int64_t i, imin, imax;
280 double abs_limit2;
281
282 if (abs_limit < 0 && frac_limit <= 0)
283 return;
284 if (rows < 3)
285 return;
286
287 index_min_max(&imin, &imax, data, rows);
288 if (!data[imax])
289 return;
290
291 abs_limit2 = frac_limit * data[imax];
292 if (abs_limit < 0 || (frac_limit && abs_limit2 < abs_limit))
293 abs_limit = abs_limit2;
294
295 if (abs_limit < 0)
296 return;
297
298 for (i = imax - 1, clip = 0; i >= 0; i--) {
299 if (!clip && data[i] < abs_limit)
300 clip = 1;
301 if (clip) {
302 in_tail[i] += 1;
303 data[i] = 0;
304 }
305 }
306
307 for (i = imax + 1, clip = 0; i < rows; i++) {
308 if (!clip && data[i] < abs_limit)
309 clip = 1;
310 if (clip) {
311 in_tail[i] += 1;
312 data[i] = 0;
313 }
314 }
315}
316
317static void clip_fwhm(double *data, int64_t n_data, double fwhm_limit, double *indep_data, short *in_tail) {
318 double top, base, fwhm;
319 int64_t i1, i2, i2_save, i, imin, imax;
320 double point1, point2;
321
322 if (n_data < 3 || fwhm_limit <= 0)
323 return;
324
325 index_min_max(&imin, &imax, data, n_data);
326 if (!data[imax])
327 return;
328
329 if (!findTopBaseLevels(&top, &base, data, n_data, 50, 2.0))
330 return;
331
332 i1 = findCrossingPoint(0, data, n_data, top * 0.5, 1, indep_data, &point1);
333 if (i1 < 0)
334 return;
335
336 i2 = i2_save = findCrossingPoint(i1, data, n_data, top * 0.9, -1, NULL, NULL);
337 if (i2 < 0)
338 return;
339
340 i2 = findCrossingPoint(i2, data, n_data, top * 0.5, -1, indep_data, &point2);
341 if (i2 < 0)
342 return;
343
344 fwhm = point2 - point1;
345
346 for (i = imax + fwhm * fwhm_limit; i < n_data; i++) {
347 in_tail[i] += 1;
348 data[i] = 0;
349 }
350 for (i = imax - fwhm * fwhm_limit; i >= 0; i--) {
351 in_tail[i] += 1;
352 data[i] = 0;
353 }
354}
355
356static void clip_after_zero(double *data, int64_t rows, long buffer_width, short *in_tail) {
357 int64_t imin, imax, i, j;
358 index_min_max(&imin, &imax, data, rows);
359 if (!data[imax])
360 return;
361 for (i = imax + 1; i < rows; i++) {
362 if (data[i] == 0) {
363 for (j = i + buffer_width; j < rows; j++) {
364 in_tail[j] += 1;
365 data[j] = 0;
366 }
367 break;
368 }
369 }
370
371 for (i = imax - 1; i >= 0; i--) {
372 if (data[i] == 0) {
373 for (j = i - buffer_width; j >= 0; j--) {
374 in_tail[j] += 1;
375 data[j] = 0;
376 }
377 break;
378 }
379 }
380}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
Definition findMinMax.c:116
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long findTopBaseLevels(double *top, double *base, double *data, int64_t points, long bins, double sigmasRequired)
Finds the top-level and base-level of a dataset.
Definition topbase.c:36
int64_t findCrossingPoint(int64_t start, double *data, int64_t points, double level, long direction, double *indepData, double *location)
Finds the crossing point in the data where the data crosses a specified level.
Definition topbase.c:118