SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddscliptails.c File Reference

Detailed Description

Clips tails from specified columns in SDDS files based on various criteria.

This program reads an SDDS (Self Describing Data Sets) file and processes specified columns to clip data tails based on criteria like fractional limits, absolute limits, full-width-half-maximum (FWHM), or separation by zero. The processed data is then written to an output SDDS file. It supports flexible configuration for clipping and output format (row or column major order).

Usage

sddscliptails [<input>] [<output>]
[-pipe=[in][,out]]
[-columns=<listOfNames>]
[-fractional=<value>]
[-absolute=<value>]
[-fwhm=<multiplier>]
[-afterzero[=<bufferWidth>]]
[-majorOrder=row|column]

Options

Required Description
-columns List of columns to process.
Optional Description
-pipe Use standard input/output for input and/or output.
-fractional Clip tails below a fraction of the peak value.
-absolute Clip tails below a specified absolute value.
-fwhm Clip tails beyond a specified number of FWHMs from the peak.
-afterzero Clip tails separated from the peak by zeros, with optional buffer width.
-majorOrder Specify row or column major order for output data.

Mutually Exclusive Options

  • Only one of the following may be specified:
    • -fractional
    • -absolute
  • Only one of the following may be specified:
    • -fwhm
    • -afterzero
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, H. Shang

Definition in file sddscliptails.c.

#include "mdb.h"
#include "scan.h"
#include "SDDS.h"

Go to the source code of this file.

Functions

static long resolve_column_names (SDDS_DATASET *sdds_in, char ***column, int32_t *columns)
 
static void clip_tail (double *data, int64_t rows, double abs_limit, double frac_limit, short *in_tail)
 
static void clip_fwhm (double *data, int64_t n_data, double fwhm_limit, double *indep_data, short *in_tail)
 
static void clip_after_zero (double *data, int64_t rows, long buffer_width, short *in_tail)
 
int main (int argc, char **argv)
 

Function Documentation

◆ clip_after_zero()

static void clip_after_zero ( double * data,
int64_t rows,
long buffer_width,
short * in_tail )
static

Definition at line 367 of file sddscliptails.c.

367 {
368 int64_t imin, imax, i, j;
369 index_min_max(&imin, &imax, data, rows);
370 if (!data[imax])
371 return;
372 for (i = imax + 1; i < rows; i++) {
373 if (data[i] == 0) {
374 for (j = i + buffer_width; j < rows; j++) {
375 in_tail[j] += 1;
376 data[j] = 0;
377 }
378 break;
379 }
380 }
381
382 for (i = imax - 1; i >= 0; i--) {
383 if (data[i] == 0) {
384 for (j = i - buffer_width; j >= 0; j--) {
385 in_tail[j] += 1;
386 data[j] = 0;
387 }
388 break;
389 }
390 }
391}
int index_min_max(int64_t *imin, int64_t *imax, double *list, int64_t n)
Finds the indices of the minimum and maximum values in a list of doubles.
Definition findMinMax.c:116

◆ clip_fwhm()

static void clip_fwhm ( double * data,
int64_t n_data,
double fwhm_limit,
double * indep_data,
short * in_tail )
static

Definition at line 328 of file sddscliptails.c.

328 {
329 double top, base, fwhm;
330 int64_t i1, i2, i2_save, i, imin, imax;
331 double point1, point2;
332
333 if (n_data < 3 || fwhm_limit <= 0)
334 return;
335
336 index_min_max(&imin, &imax, data, n_data);
337 if (!data[imax])
338 return;
339
340 if (!findTopBaseLevels(&top, &base, data, n_data, 50, 2.0))
341 return;
342
343 i1 = findCrossingPoint(0, data, n_data, top * 0.5, 1, indep_data, &point1);
344 if (i1 < 0)
345 return;
346
347 i2 = i2_save = findCrossingPoint(i1, data, n_data, top * 0.9, -1, NULL, NULL);
348 if (i2 < 0)
349 return;
350
351 i2 = findCrossingPoint(i2, data, n_data, top * 0.5, -1, indep_data, &point2);
352 if (i2 < 0)
353 return;
354
355 fwhm = point2 - point1;
356
357 for (i = imax + fwhm * fwhm_limit; i < n_data; i++) {
358 in_tail[i] += 1;
359 data[i] = 0;
360 }
361 for (i = imax - fwhm * fwhm_limit; i >= 0; i--) {
362 in_tail[i] += 1;
363 data[i] = 0;
364 }
365}
long findTopBaseLevels(double *top, double *base, double *data, int64_t points, long bins, double sigmasRequired)
Finds the top-level and base-level of a dataset.
Definition topbase.c:36
int64_t findCrossingPoint(int64_t start, double *data, int64_t points, double level, long direction, double *indepData, double *location)
Finds the crossing point in the data where the data crosses a specified level.
Definition topbase.c:118

◆ clip_tail()

static void clip_tail ( double * data,
int64_t rows,
double abs_limit,
double frac_limit,
short * in_tail )
static

Definition at line 288 of file sddscliptails.c.

288 {
289 long clip;
290 int64_t i, imin, imax;
291 double abs_limit2;
292
293 if (abs_limit < 0 && frac_limit <= 0)
294 return;
295 if (rows < 3)
296 return;
297
298 index_min_max(&imin, &imax, data, rows);
299 if (!data[imax])
300 return;
301
302 abs_limit2 = frac_limit * data[imax];
303 if (abs_limit < 0 || (frac_limit && abs_limit2 < abs_limit))
304 abs_limit = abs_limit2;
305
306 if (abs_limit < 0)
307 return;
308
309 for (i = imax - 1, clip = 0; i >= 0; i--) {
310 if (!clip && data[i] < abs_limit)
311 clip = 1;
312 if (clip) {
313 in_tail[i] += 1;
314 data[i] = 0;
315 }
316 }
317
318 for (i = imax + 1, clip = 0; i < rows; i++) {
319 if (!clip && data[i] < abs_limit)
320 clip = 1;
321 if (clip) {
322 in_tail[i] += 1;
323 data[i] = 0;
324 }
325 }
326}

◆ main()

int main ( int argc,
char ** argv )

Definition at line 102 of file sddscliptails.c.

102 {
103 int i_arg;
104 char **input_column;
105 char *input, *output;
106 long read_code, after_zero, after_zero_buffer_width;
107 int64_t i, rows;
108 int32_t columns;
109 unsigned long pipe_flags, major_order_flag;
110 SCANNED_ARG *scanned;
111 SDDS_DATASET sdds_in, sdds_out;
112 double *data, *indep_data;
113 double fractional_limit, absolute_limit, fwhm_limit;
114 short *in_tail;
115 short column_major_order = -1;
116
118 argc = scanargs(&scanned, argc, argv);
119 if (argc < 2)
120 bomb(NULL, usage);
121
122 output = input = NULL;
123 input_column = NULL;
124 columns = 0;
125 pipe_flags = 0;
126 fractional_limit = fwhm_limit = 0;
127 absolute_limit = -1;
128 after_zero = 0;
129
130 for (i_arg = 1; i_arg < argc; i_arg++) {
131 if (scanned[i_arg].arg_type == OPTION) {
132 /* Process options */
133 switch (match_string(scanned[i_arg].list[0], option, N_OPTIONS, 0)) {
134 case CLO_MAJOR_ORDER:
135 major_order_flag = 0;
136 scanned[i_arg].n_items--;
137 if (scanned[i_arg].n_items > 0 &&
138 (!scanItemList(&major_order_flag, scanned[i_arg].list + 1, &scanned[i_arg].n_items, 0,
139 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
140 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
141 SDDS_Bomb("invalid -majorOrder syntax/values");
142 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER)
143 column_major_order = 1;
144 else if (major_order_flag & SDDS_ROW_MAJOR_ORDER)
145 column_major_order = 0;
146 break;
147 case CLO_COLUMNS:
148 if (scanned[i_arg].n_items < 2)
149 SDDS_Bomb("invalid -columns syntax");
150 input_column = tmalloc(sizeof(*input_column) * (columns = scanned[i_arg].n_items - 1));
151 for (i = 0; i < columns; i++)
152 input_column[i] = scanned[i_arg].list[i + 1];
153 break;
154 case CLO_PIPE:
155 if (!processPipeOption(scanned[i_arg].list + 1, scanned[i_arg].n_items - 1, &pipe_flags))
156 SDDS_Bomb("invalid -pipe syntax");
157 break;
158 case CLO_FRACTIONAL:
159 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &fractional_limit) != 1 || fractional_limit < 0)
160 SDDS_Bomb("invalid -fractional syntax");
161 break;
162 case CLO_ABSOLUTE:
163 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &absolute_limit) != 1 || absolute_limit < 0)
164 SDDS_Bomb("invalid -absolute syntax");
165 break;
166 case CLO_FWHM:
167 if (scanned[i_arg].n_items < 2 || sscanf(scanned[i_arg].list[1], "%lf", &fwhm_limit) != 1 || fwhm_limit < 0)
168 SDDS_Bomb("invalid -fwhm syntax");
169 break;
170 case CLO_AFTERZERO:
171 after_zero = 1;
172 if (scanned[i_arg].n_items > 2 ||
173 (scanned[i_arg].n_items == 2 &&
174 (sscanf(scanned[i_arg].list[1], "%ld", &after_zero_buffer_width) != 1 ||
175 after_zero_buffer_width <= 0)))
176 SDDS_Bomb("invalid -afterZero syntax");
177 break;
178 default:
179 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[i_arg].list[0]);
180 exit(1);
181 break;
182 }
183 } else {
184 if (!input)
185 input = scanned[i_arg].list[0];
186 else if (!output)
187 output = scanned[i_arg].list[0];
188 else
189 SDDS_Bomb("too many filenames seen");
190 }
191 }
192
193 processFilenames("sddscliptails", &input, &output, pipe_flags, 0, NULL);
194
195 if (!columns)
196 SDDS_Bomb("supply the names of columns to process with the -columns option");
197
198 if (!SDDS_InitializeInput(&sdds_in, input))
199 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
200
201 if (!resolve_column_names(&sdds_in, &input_column, &columns))
202 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
203 if (!columns)
204 SDDS_Bomb("no columns selected for processing");
205
206 if (!SDDS_InitializeCopy(&sdds_out, &sdds_in, output, "w"))
207 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
208 if (column_major_order != -1)
209 sdds_out.layout.data_mode.column_major = column_major_order;
210 else
211 sdds_out.layout.data_mode.column_major = sdds_in.layout.data_mode.column_major;
212
213 if (!SDDS_DefineColumn(&sdds_out, "InTail", NULL, NULL, NULL, NULL, SDDS_SHORT, 0))
214 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
215
216 if (!SDDS_WriteLayout(&sdds_out))
217 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
218
219 indep_data = NULL;
220 in_tail = NULL;
221 while ((read_code = SDDS_ReadPage(&sdds_in)) > 0) {
222 if (!SDDS_CopyPage(&sdds_out, &sdds_in))
223 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
224 if ((rows = SDDS_CountRowsOfInterest(&sdds_in))) {
225 if (fwhm_limit > 0) {
226 indep_data = SDDS_Realloc(indep_data, sizeof(*indep_data) * rows);
227 if (!indep_data)
228 SDDS_Bomb("memory allocation failure");
229 for (i = 0; i < rows; i++)
230 indep_data[i] = i;
231 }
232 in_tail = SDDS_Realloc(in_tail, sizeof(*in_tail) * rows);
233 if (!in_tail)
234 SDDS_Bomb("memory allocation failure");
235 for (i = 0; i < rows; i++)
236 in_tail[i] = 0;
237 for (i = 0; i < columns; i++) {
238 data = SDDS_GetColumnInDoubles(&sdds_in, input_column[i]);
239 if (!data)
240 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
241 clip_tail(data, rows, absolute_limit, fractional_limit, in_tail);
242 if (fwhm_limit > 0)
243 clip_fwhm(data, rows, fwhm_limit, indep_data, in_tail);
244 if (after_zero)
245 clip_after_zero(data, rows, after_zero_buffer_width, in_tail);
246 if (!SDDS_SetColumnFromDoubles(&sdds_out, SDDS_SET_BY_NAME, data, rows, input_column[i]))
247 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
248 free(data);
249 }
250 }
251 if (!SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, in_tail, rows, "InTail"))
252 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
253 if (!SDDS_WritePage(&sdds_out))
254 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
255 }
256 if (SDDS_Terminate(&sdds_in) != 1) {
257 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
258 return 1;
259 }
260 if (SDDS_Terminate(&sdds_out) != 1) {
261 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
262 return 1;
263 }
264 if (indep_data)
265 free(indep_data);
266 if (in_tail)
267 free(in_tail);
268 return 0;
269}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ resolve_column_names()

static long resolve_column_names ( SDDS_DATASET * sdds_in,
char *** column,
int32_t * columns )
static

Definition at line 271 of file sddscliptails.c.

271 {
272 long i;
273
274 SDDS_SetColumnFlags(sdds_in, 0);
275 for (i = 0; i < *columns; i++) {
276 if (!SDDS_SetColumnsOfInterest(sdds_in, SDDS_MATCH_STRING, (*column)[i], SDDS_OR))
277 return 0;
278 }
279 free(*column);
280 *column = SDDS_GetColumnNames(sdds_in, columns);
281 if (!*column || *columns == 0) {
282 SDDS_SetError("no columns found");
283 return 0;
284 }
285 return 1;
286}
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.