SDDS ToolKit Programs and Libraries for C and Python
Loading...
Searching...
No Matches
sddscliptails.c
Go to the documentation of this file.
1/**
2 * @file sddscliptails.c
3 * @brief Clips tails from specified columns in SDDS files based on various criteria.
4 *
5 * @details
6 * This program reads an SDDS (Self Describing Data Sets) file and processes specified columns to
7 * clip data tails based on criteria like fractional limits, absolute limits, full-width-half-maximum
8 * (FWHM), or separation by zero. The processed data is then written to an output SDDS file.
9 * It supports flexible configuration for clipping and output format (row or column major order).
10 *
11 * @section Usage
12 * ```
13 * sddscliptails [<input>] [<output>]
14 * [-pipe=[in][,out]]
15 * [-columns=<listOfNames>]
16 * [-fractional=<value>]
17 * [-absolute=<value>]
18 * [-fwhm=<multiplier>]
19 * [-afterzero[=<bufferWidth>]]
20 * [-majorOrder=row|column]
21 * ```
22 *
23 * @section Options
24 * | Required | Description |
25 * |-----------|-----------------------------------------------------------------------------|
26 * | `-columns` | List of columns to process. |
27 *
28 * | Optional | Description |
29 * |-----------------------|--------------------------------------------------------------------------|
30 * | `-pipe` | Use standard input/output for input and/or output. |
31 * | `-fractional` | Clip tails below a fraction of the peak value. |
32 * | `-absolute` | Clip tails below a specified absolute value. |
33 * | `-fwhm` | Clip tails beyond a specified number of FWHMs from the peak. |
34 * | `-afterzero` | Clip tails separated from the peak by zeros, with optional buffer width. |
35 * | `-majorOrder` | Specify row or column major order for output data. |
36 *
37 * @subsection MEO Mutually Exclusive Options
38 * - Only one of the following may be specified:
39 * - `-fractional`
40 * - `-absolute`
41 * - Only one of the following may be specified:
42 * - `-fwhm`
43 * - `-afterzero`
44 *
45 * @copyright
46 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
47 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
48 *
49 * @license
50 * This file is distributed under the terms of the Software License Agreement
51 * found in the file LICENSE included with this distribution.
52 *
53 * @author
54 * M. Borland, R. Soliday, H. Shang
55 */
56
57#include "mdb.h"
58#include "scan.h"
59#include "SDDS.h"
60
61/* Enumeration for option types */
62typedef enum {
63 CLO_FRACTIONAL,
64 CLO_ABSOLUTE,
65 CLO_FWHM,
66 CLO_PIPE,
67 CLO_COLUMNS,
68 CLO_AFTERZERO,
69 CLO_MAJOR_ORDER,
70 N_OPTIONS
71} option_type;
72
73static char *option[N_OPTIONS] = {
74 "fractional",
75 "absolute",
76 "fwhm",
77 "pipe",
78 "columns",
79 "afterzero",
80 "majorOrder",
81};
82
83static char *usage =
84 "sddscliptails [<input>] [<output>] [-pipe=[input][,output]]\n"
85 " [-columns=<listOfNames>] [-fractional=<value>] [-absolute=<value>] [-fwhm=<multiplier>]\n"
86 " [-afterzero[=<bufferWidth>]] [-majorOrder=row|column]\n\n"
87 "-columns List of columns to process.\n"
88 "-fractional Clip a tail if it falls below this fraction of the peak.\n"
89 "-absolute Clip a tail if it falls below this absolute value.\n"
90 "-fwhm Clip a tail if it is beyond this many FWHM from the peak.\n"
91 "-afterzero Clip a tail if it is separated from the peak by values equal to zero.\n"
92 " If <bufferWidth> is specified, then a region <bufferWidth> wide is kept\n"
93 " on either side of the peak, if possible.\n"
94 "-majorOrder Writes output file in row or column major order.\n\n"
95 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
96
97static long resolve_column_names(SDDS_DATASET *sdds_in, char ***column, int32_t *columns);
98static void clip_tail(double *data, int64_t rows, double abs_limit, double frac_limit, short *in_tail);
99static void clip_fwhm(double *data, int64_t n_data, double fwhm_limit, double *indep_data, short *in_tail);
100static void clip_after_zero(double *data, int64_t rows, long buffer_width, short *in_tail);
101
102int main(int argc, char **argv) {
103 int i_arg;
104 char **input_column;
105 char *input, *output;
106 long read_code, after_zero, after_zero_buffer_width;
107 int64_t i, rows;
108 int32_t columns;
109 unsigned long pipe_flags, major_order_flag;
110 SCANNED_ARG *scanned;
111 SDDS_DATASET sdds_in, sdds_out;
112 double *data, *indep_data;
113 double fractional_limit, absolute_limit, fwhm_limit;
114 short *in_tail;
115 short column_major_order = -1;
116
118 argc = scanargs(&scanned, argc, argv);
119 if (argc < 2)
120 bomb(NULL, usage);
121
122 output = input = NULL;
123 input_column = NULL;
124 columns = 0;
125 pipe_flags = 0;
126 fractional_limit = fwhm_limit = 0;
127 absolute_limit = -1;
128 after_zero = 0;
129
130 for (i_arg = 1; i_arg < argc; i_arg++) {
131 if (scanned[i_arg].arg_type == OPTION) {
132 /* Process options */
133 switch (match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
134 case CLO_MAJOR_ORDER:
135 major_order_flag = 0;
136 scanned[i_arg].n_items--;
137 if (scanned[i_arg].n_items > 0 &&
138 (!scanItemList(&major_order_flag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
139 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
140 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
141 SDDS_Bomb("invalid -majorOrder syntax/values");
142 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER)
143 column_major_order = 1;
144 else if (major_order_flag & SDDS_ROW_MAJOR_ORDER)
145 column_major_order = 0;
146 break;
147 case CLO_COLUMNS:
148 if (scanned[i_arg].n_items < 2)
149 SDDS_Bomb("invalid -columns syntax");
150 input_column = tmalloc(sizeof(*input_column) * (columns = scanned[i_arg].n_items - 1));
151 for (i = 0; i < columns; i++)
152 input_column[i] = scanned[i_arg].list[i + 1];
153 break;
154 case CLO_PIPE:
155 if (!processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipe_flags))
156 SDDS_Bomb("invalid -pipe syntax");
157 break;
158 case CLO_FRACTIONAL:
159 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &fractional_limit) != 1 || fractional_limit < 0)
160 SDDS_Bomb("invalid -fractional syntax");
161 break;
162 case CLO_ABSOLUTE:
163 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &absolute_limit) != 1 || absolute_limit < 0)
164 SDDS_Bomb("invalid -absolute syntax");
165 break;
166 case CLO_FWHM:
167 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &fwhm_limit) != 1 || fwhm_limit < 0)
168 SDDS_Bomb("invalid -fwhm syntax");
169 break;
170 case CLO_AFTERZERO:
171 after_zero = 1;
172 if (scanned[i_arg].n_items > 2 ||
173 (scanned[i_arg].n_items == 2 &&
174 (sscanf(scanned[i_arg].list[1], "%ld", &after_zero_buffer_width) != 1 ||
175 after_zero_buffer_width <= 0)))
176 SDDS_Bomb("invalid -afterZero syntax");
177 break;
178 default:
179 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[i_arg].list[0]);
180 exit(1);
181 break;
182 }
183 } else {
184 if (!input)
185 input = scanned[i_arg].list[0];
186 else if (!output)
187 output = scanned[i_arg].list[0];
188 else
189 SDDS_Bomb("too many filenames seen");
190 }
191 }
192
193 processFilenames("sddscliptails", &input, &output, pipe_flags, 0, NULL);
194
195 if (!columns)
196 SDDS_Bomb("supply the names of columns to process with the -columns option");
197
198 if (!SDDS_InitializeInput(&sdds_in, input))
199 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
200
201 if (!resolve_column_names(&sdds_in, &input_column, &columns))
202 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
203 if (!columns)
204 SDDS_Bomb("no columns selected for processing");
205
206 if (!SDDS_InitializeCopy(&sdds_out, &sdds_in, output, "w"))
207 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
208 if (column_major_order != -1)
209 sdds_out.layout.data_mode.column_major = column_major_order;
210 else
211 sdds_out.layout.data_mode.column_major = sdds_in.layout.data_mode.column_major;
212
213 if (!SDDS_DefineColumn(&sdds_out, "InTail", NULL, NULL, NULL, NULL, SDDS_SHORT, 0))
214 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
215
216 if (!SDDS_WriteLayout(&sdds_out))
217 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
218
219 indep_data = NULL;
220 in_tail = NULL;
221 while ((read_code = SDDS_ReadPage(&sdds_in)) > 0) {
222 if (!SDDS_CopyPage(&sdds_out, &sdds_in))
223 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
224 if ((rows = SDDS_CountRowsOfInterest(&sdds_in))) {
225 if (fwhm_limit > 0) {
226 indep_data = SDDS_Realloc(indep_data, sizeof(*indep_data) * rows);
227 if (!indep_data)
228 SDDS_Bomb("memory allocation failure");
229 for (i = 0; i < rows; i++)
230 indep_data[i] = i;
231 }
232 in_tail = SDDS_Realloc(in_tail, sizeof(*in_tail) * rows);
233 if (!in_tail)
234 SDDS_Bomb("memory allocation failure");
235 for (i = 0; i < rows; i++)
236 in_tail[i] = 0;
237 for (i = 0; i < columns; i++) {
238 data = SDDS_GetColumnInDoubles(&sdds_in, input_column[i]);
239 if (!data)
240 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
241 clip_tail(data, rows, absolute_limit, fractional_limit, in_tail);
242 if (fwhm_limit > 0)
243 clip_fwhm(data, rows, fwhm_limit, indep_data, in_tail);
244 if (after_zero)
245 clip_after_zero(data, rows, after_zero_buffer_width, in_tail);
246 if (!SDDS_SetColumnFromDoubles(&sdds_out, SDDS_SET_BY_NAME, data, rows, input_column[i]))
247 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
248 free(data);
249 }
250 }
251 if (!SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, in_tail, rows, "InTail"))
252 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
253 if (!SDDS_WritePage(&sdds_out))
254 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
255 }
256 if (SDDS_Terminate(&sdds_in) != 1) {
257 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
258 return 1;
259 }
260 if (SDDS_Terminate(&sdds_out) != 1) {
261 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
262 return 1;
263 }
264 if (indep_data)
265 free(indep_data);
266 if (in_tail)
267 free(in_tail);
268 return 0;
269}
270
271static long resolve_column_names(SDDS_DATASET *sdds_in, char ***column, int32_t *columns) {
272 long i;
273
274 SDDS_SetColumnFlags(sdds_in, 0);
275 for (i = 0; i < *columns; i++) {
276 if (!SDDS_SetColumnsOfInterest(sdds_in, SDDS_MATCH_STRING, (*column)[i], SDDS_OR))
277 return 0;
278 }
279 free(*column);
280 *column = SDDS_GetColumnNames(sdds_in, columns);
281 if (!*column || *columns == 0) {
282 SDDS_SetError("no columns found");
283 return 0;
284 }
285 return 1;
286}
287
288static void clip_tail(double *data, int64_t rows, double abs_limit, double frac_limit, short *in_tail) {
289 long clip;
290 int64_t i, imin, imax;
291 double abs_limit2;
292
293 if (abs_limit < 0 && frac_limit <= 0)
294 return;
295 if (rows < 3)
296 return;
297
298 index_min_max(&imin, &imax, data, rows);
299 if (!data[imax])
300 return;
301
302 abs_limit2 = frac_limit * data[imax];
303 if (abs_limit < 0 || (frac_limit && abs_limit2 < abs_limit))
304 abs_limit = abs_limit2;
305
306 if (abs_limit < 0)
307 return;
308
309 for (i = imax - 1, clip = 0; i >= 0; i--) {
310 if (!clip && data[i] < abs_limit)
311 clip = 1;
312 if (clip) {
313 in_tail[i] += 1;
314 data[i] = 0;
315 }
316 }
317
318 for (i = imax + 1, clip = 0; i < rows; i++) {
319 if (!clip && data[i] < abs_limit)
320 clip = 1;
321 if (clip) {
322 in_tail[i] += 1;
323 data[i] = 0;
324 }
325 }
326}
327
328static void clip_fwhm(double *data, int64_t n_data, double fwhm_limit, double *indep_data, short *in_tail) {
329 double top, base, fwhm;
330 int64_t i1, i2, i2_save, i, imin, imax;
331 double point1, point2;
332
333 if (n_data < 3 || fwhm_limit <= 0)
334 return;
335
336 index_min_max(&imin, &imax, data, n_data);
337 if (!data[imax])
338 return;
339
340 if (!findTopBaseLevels(&top, &base, data, n_data, 50, 2.0))
341 return;
342
343 i1 = findCrossingPoint(0, data, n_data, top * 0.5, 1, indep_data, &point1);
344 if (i1 < 0)
345 return;
346
347 i2 = i2_save = findCrossingPoint(i1, data, n_data, top * 0.9, -1, NULL, NULL);
348 if (i2 < 0)
349 return;
350
351 i2 = findCrossingPoint(i2, data, n_data, top * 0.5, -1, indep_data, &point2);
352 if (i2 < 0)
353 return;
354
355 fwhm = point2 - point1;
356
357 for (i = imax + fwhm * fwhm_limit; i < n_data; i++) {
358 in_tail[i] += 1;
359 data[i] = 0;
360 }
361 for (i = imax - fwhm * fwhm_limit; i >= 0; i--) {
362 in_tail[i] += 1;
363 data[i] = 0;
364 }
365}
366
367static void clip_after_zero(double *data, int64_t rows, long buffer_width, short *in_tail) {
368 int64_t imin, imax, i, j;
369 index_min_max(&imin, &imax, data, rows);
370 if (!data[imax])
371 return;
372 for (i = imax + 1; i < rows; i++) {
373 if (data[i] == 0) {
374 for (j = i + buffer_width; j < rows; j++) {
375 in_tail[j] += 1;
376 data[j] = 0;
377 }
378 break;
379 }
380 }
381
382 for (i = imax - 1; i >= 0; i--) {
383 if (data[i] == 0) {
384 for (j = i - buffer_width; j >= 0; j--) {
385 in_tail[j] += 1;
386 data[j] = 0;
387 }
388 break;
389 }
390 }
391}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
Definition findMinMax.c:116
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long findTopBaseLevels(double *top, double *base, double *data, int64_t points, long bins, double sigmasRequired)
Finds the top-level and base-level of a dataset.
Definition topbase.c:36
int64_t findCrossingPoint(int64_t start, double *data, int64_t points, double level, long direction, double *indepData, double *location)
Finds the crossing point in the data where the data crosses a specified level.
Definition topbase.c:118