SDDSlib
Loading...
Searching...
No Matches
sddsshift.c
Go to the documentation of this file.
1/**
2 * @file sddsshift.c
3 * @brief Program for shifting data columns in SDDS files.
4 *
5 * This program provides functionalities for shifting data columns in SDDS files.
6 * It supports several shift modes, including zero-padding and circular shifting, and allows
7 * matching columns to minimize the least squares error during the shift process.
8 *
9 * ### Features:
10 * - Supports shifting by a specified number of rows (positive or negative).
11 * - Allows matching one column to another using least squares error minimization.
12 * - Offers zero-padding or circular shifting for exposed endpoints.
13 * - Can process SDDS files in both row-major and column-major order.
14 *
15 * ### Usage:
16 * ```
17 * sddsshift [<inputfile>] [<outputfile>]
18 * [-pipe=[input][,output]] -columns=<inputcol>[,...]
19 * [-zero | -circular] [-shift=<points> | -match=<matchcol>]
20 * [-majorOrder=row|column]
21 * ```
22 *
23 * ### Options:
24 * - `-columns=<inputcol>[,...]`: Specify the data columns to shift. Wildcards are accepted.
25 * - `-shift=<points>`: Number of rows to shift (positive = later, negative = earlier).
26 * - `-match=<matchcol>`: Specify a column to match for least squares error minimization.
27 * - `-zero`: Set exposed end-points to zero.
28 * - `-circular`: Perform circular shifting of data.
29 * - `-majorOrder=row|column`: Specify the output file order.
30 *
31 * ### Example:
32 * ```
33 * sddsshift input.sdds output.sdds -columns=col1,col2 -shift=5 -zero
34 * ```
35 *
36 * @copyright
37 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
38 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
39 *
40 * @license
41 * This file is distributed under the terms of the Software License Agreement
42 * found in the file LICENSE included with this distribution.
43 *
44 * @author M. Borland, C. Saunders, R. Soliday, H. Shang
45 */
46
47#include "SDDS.h"
48#include "mdb.h"
49#include "scan.h"
50
51/* Enumeration for option types */
52typedef enum {
53 CLO_PIPE,
54 CLO_COLUMNS,
55 CLO_DELAY,
56 CLO_MATCH,
57 CLO_ZERO,
58 CLO_MAJOR_ORDER,
59 CLO_CIRCULAR,
60 N_OPTIONS
61} option_type;
62
63static char *option[N_OPTIONS] = {
64 "pipe",
65 "columns",
66 "shift",
67 "match",
68 "zero",
69 "majorOrder",
70 "circular"};
71
72/* Usage information */
73static char *usage =
74 "Usage: sddsshift [<inputfile>] [<outputfile>]\n"
75 " [-pipe=[input][,output]] -columns=<inputcol>[,...]\n"
76 " [-zero | -circular] [-shift=<points> | -match=<matchcol>]\n"
77 " [-majorOrder=row|column]\n\n"
78 "-columns Provide <inputcols>, i.e., the data columns to be shifted.\n"
79 " Wildcards accepted.\n"
80 "-shift Provide number of points to shift in rows.\n"
81 " (positive = later, negative = earlier).\n"
82 "-match Provide <matchcol>. <inputcol> is shifted to\n"
83 " minimize the least squares error relative to <matchcol>.\n"
84 "-zero Set exposed end-points to zero.\n"
85 "-circular Shift the data in a circular fashion.\n"
86 "-majorOrder Specify output file in row or column major order.\n\n"
87 "sddsshift shifts specified data columns by rows. A copy of <inputfile> is made with the\n"
88 "addition of new columns \"Shifted<inputcol>\". Exposed end-points\n"
89 "are set to zero if the zero option is provided or\n"
90 "the value of the first/last row in <inputcol> as appropriate.\n"
91 "A parameter \"<inputcol>Shift\" contains the number of rows shifted.\n";
92
93/* Function prototypes */
94void shift(double *inputcol, double *outputcol, int64_t npoints, long delay, long zero, long circular);
95double mse(double *y1, double *y2, long npoints);
96double simplex_driver(double *data, long *invalid);
97long resolve_column_names(SDDS_DATASET *SDDSin, char ***column, int32_t *columns);
98
99/* Global variables */
100double *input_col, *working, *match_col;
101int64_t npoints;
102long zero, circular;
103
104int main(int argc, char *argv[]) {
105 SCANNED_ARG *s_arg;
106 double *output_col;
107 char *input_col_name, *match_col_name, *inputfile, *outputfile;
108 long i, i_arg, delay, tmp_file_used;
109 unsigned long pipe_flags, major_order_flag;
110 char actual_name[256], actual_desc[256], delay_name[256];
111 double sim_delay, lower, upper, final_mse;
112 SDDS_DATASET SDDS_input, SDDS_output;
113 short column_major_order = -1;
114 char **input_col_names = NULL;
115 int32_t n_input_col_names = 0;
116
118 argc = scanargs(&s_arg, argc, argv);
119 if (argc < 2) {
120 bomb(NULL, usage);
121 }
122
123 /* Initialize flags and defaults */
124 tmp_file_used = 0;
125 pipe_flags = 0;
126 inputfile = outputfile = input_col_name = match_col_name = NULL;
127 delay = 0;
128 sim_delay = 0.0;
129 zero = circular = 0;
130 input_col = match_col = output_col = working = NULL;
131
132 /* Process arguments */
133 for (i_arg = 1; i_arg < argc; i_arg++) {
134 if (s_arg[i_arg].arg_type == OPTION) {
135 delete_chars(s_arg[i_arg].list[0], "_");
136 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
137 case CLO_MAJOR_ORDER:
138 major_order_flag = 0;
139 s_arg[i_arg].n_items--;
140 if (s_arg[i_arg].n_items > 0 &&
141 (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
142 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
143 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) {
144 SDDS_Bomb("Invalid -majorOrder syntax/values");
145 }
146 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER) {
147 column_major_order = 1;
148 } else if (major_order_flag & SDDS_ROW_MAJOR_ORDER) {
149 column_major_order = 0;
150 }
151 break;
152 case CLO_PIPE:
153 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags)) {
154 SDDS_Bomb("Invalid -pipe syntax");
155 }
156 break;
157 case CLO_COLUMNS:
158 if (s_arg[i_arg].n_items < 2) {
159 SDDS_Bomb("Invalid -columns syntax.");
160 }
161 if (n_input_col_names) {
162 SDDS_Bomb("Invalid syntax: specify -columns once only");
163 }
164 input_col_names = tmalloc(sizeof(*input_col_names) * (s_arg[i_arg].n_items - 1));
165 for (i = 1; i < s_arg[i_arg].n_items; i++) {
166 input_col_names[i - 1] = s_arg[i_arg].list[i];
167 }
168 n_input_col_names = s_arg[i_arg].n_items - 1;
169 break;
170 case CLO_DELAY:
171 if (s_arg[i_arg].n_items != 2) {
172 SDDS_Bomb("Invalid -delay option.");
173 }
174 if (!get_long(&delay, s_arg[i_arg].list[1])) {
175 SDDS_Bomb("Invalid delay value provided.");
176 }
177 break;
178 case CLO_MATCH:
179 if (s_arg[i_arg].n_items != 2) {
180 SDDS_Bomb("Invalid -match option.");
181 }
182 match_col_name = s_arg[i_arg].list[1];
183 break;
184 case CLO_ZERO:
185 zero = 1;
186 break;
187 case CLO_CIRCULAR:
188 circular = 1;
189 break;
190 default:
191 fprintf(stderr, "Error (%s): unknown switch: %s\n", argv[0], s_arg[i_arg].list[0]);
192 exit(EXIT_FAILURE);
193 }
194 } else {
195 if (!inputfile) {
196 inputfile = s_arg[i_arg].list[0];
197 } else if (!outputfile) {
198 outputfile = s_arg[i_arg].list[0];
199 } else {
200 SDDS_Bomb("Too many files provided.");
201 }
202 }
203 }
204
205 processFilenames("sddsshift", &inputfile, &outputfile, pipe_flags, 1, &tmp_file_used);
206
207 if (zero && circular) {
208 SDDS_Bomb("The -zero and -circular options are mutually exclusive.");
209 }
210
211 if (n_input_col_names == 0) {
212 SDDS_Bomb("A shift column is not given!");
213 }
214 if (!match_col_name && !delay) {
215 SDDS_Bomb("Either match column or shift should be provided.");
216 }
217 if (match_col_name && delay) {
218 SDDS_Bomb("-match column option and -shift option are incompatible.");
219 }
220
221 /* Initialize SDDS input */
222 if (!SDDS_InitializeInput(&SDDS_input, inputfile)) {
223 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
224 }
225
226 if (!resolve_column_names(&SDDS_input, &input_col_names, &n_input_col_names)) {
227 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
228 }
229
230 if (!SDDS_InitializeCopy(&SDDS_output, &SDDS_input, outputfile, "w")) {
231 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
232 exit(EXIT_FAILURE);
233 }
234
235 SDDS_output.layout.data_mode.column_major = column_major_order != -1 ? column_major_order : SDDS_input.layout.data_mode.column_major;
236
237 for (i = 0; i < n_input_col_names; i++) {
238 snprintf(actual_name, sizeof(actual_name), "Shifted%s", input_col_names[i]);
239 snprintf(actual_desc, sizeof(actual_desc), "Shifted %s", input_col_names[i]);
240 snprintf(delay_name, sizeof(delay_name), "%sShift", input_col_names[i]);
241 if (SDDS_DefineColumn(&SDDS_output, actual_name, NULL, NULL, actual_desc, NULL, SDDS_DOUBLE, 0) < 0 ||
242 SDDS_DefineParameter(&SDDS_output, delay_name, NULL, NULL, NULL, NULL, SDDS_LONG, NULL) < 0) {
243 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
244 exit(EXIT_FAILURE);
245 }
246 }
247
248 if (!SDDS_WriteLayout(&SDDS_output)) {
249 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
250 exit(EXIT_FAILURE);
251 }
252
253 while ((SDDS_ReadPage(&SDDS_input)) > 0) {
254 if (!SDDS_CopyPage(&SDDS_output, &SDDS_input)) {
255 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
256 }
257
258 npoints = SDDS_CountRowsOfInterest(&SDDS_input);
259 if (npoints < 0) {
260 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
261 }
262
263 for (i = 0; i < n_input_col_names; i++) {
264 snprintf(actual_name, sizeof(actual_name), "Shifted%s", input_col_names[i]);
265 snprintf(delay_name, sizeof(delay_name), "%sShift", input_col_names[i]);
266 input_col = SDDS_GetColumnInDoubles(&SDDS_input, input_col_names[i]);
267 if (!input_col) {
268 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
269 }
270
271 output_col = calloc(npoints, sizeof(*output_col));
272 if (!output_col) {
273 SDDS_Bomb("Memory allocation failure");
274 }
275
276 if (match_col_name) {
277 match_col = SDDS_GetColumnInDoubles(&SDDS_input, match_col_name);
278 if (!match_col) {
279 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
280 }
281 working = calloc(npoints, sizeof(*working));
282 if (!working) {
283 SDDS_Bomb("Memory allocation failure");
284 }
285 lower = -((double)npoints - 1);
286 upper = (double)npoints - 1;
287 simplexMin(&final_mse, &sim_delay, NULL, &lower, &upper, NULL, 1, 1e-6, 1e-12, simplex_driver, NULL, 2 * npoints, 6, 12, 3, 1.0, 0);
288 delay = (long)sim_delay;
289 free(working);
290 free(match_col);
291 match_col = working = NULL;
292 }
293
294 shift(input_col, output_col, npoints, delay, zero, circular);
295 if (!SDDS_SetParameters(&SDDS_output, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, delay_name, delay, NULL) ||
296 !SDDS_SetColumn(&SDDS_output, SDDS_SET_BY_NAME, output_col, npoints, actual_name)) {
297 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
298 }
299
300 free(output_col);
301 free(input_col);
302 output_col = input_col = NULL;
303 }
304
305 if (!SDDS_WritePage(&SDDS_output)) {
306 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
307 }
308 }
309
310 if (!SDDS_Terminate(&SDDS_input) || !SDDS_Terminate(&SDDS_output)) {
311 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
312 }
313
314 free_scanargs(&s_arg, argc);
315 return EXIT_SUCCESS;
316}
317
318long resolve_column_names(SDDS_DATASET *SDDSin, char ***column, int32_t *columns) {
319 long i;
320
321 SDDS_SetColumnFlags(SDDSin, 0);
322 for (i = 0; i < *columns; i++) {
323 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, (*column)[i], SDDS_OR)) {
324 return 0;
325 }
326 }
327 free(*column);
328 *column = SDDS_GetColumnNames(SDDSin, columns);
329 if (!*column || *columns == 0) {
330 SDDS_SetError("No columns found");
331 return 0;
332 }
333 return 1;
334}
335
336void shift(double *inputcol, double *outputcol, int64_t npoints, long delay, long zero, long circular) {
337 int64_t i, j;
338
339 if (circular) {
340 short local_buffer = 0;
341 if (inputcol == outputcol) {
342 inputcol = tmalloc(sizeof(*inputcol) * npoints);
343 memcpy(inputcol, outputcol, sizeof(*inputcol) * npoints);
344 local_buffer = 1;
345 }
346 for (i = 0; i < npoints; i++) {
347 j = (i - delay) % npoints;
348 if (j < 0) {
349 j += npoints;
350 }
351 outputcol[i] = inputcol[j];
352 }
353 if (local_buffer) {
354 free(inputcol);
355 }
356 } else {
357 if (delay < 0) {
358 delay = -delay;
359 for (i = 0; i < npoints - delay; i++) {
360 outputcol[i] = inputcol[i + delay];
361 }
362 for (i = npoints - delay; i < npoints; i++) {
363 outputcol[i] = zero ? 0 : inputcol[npoints - 1];
364 }
365 } else {
366 for (i = npoints - 1; i >= delay; i--) {
367 outputcol[i] = inputcol[i - delay];
368 }
369 for (i = 0; i < delay; i++) {
370 outputcol[i] = zero ? 0 : inputcol[0];
371 }
372 }
373 }
374}
375
376double mse(double *y1, double *y2, long npoints) {
377 long i;
378 double error = 0;
379
380 for (i = 0; i < npoints; i++) {
381 error += (y1[i] - y2[i]) * (y1[i] - y2[i]);
382 }
383 return error / npoints;
384}
385
386double simplex_driver(double *data, long *invalid) {
387 long delay;
388
389 *invalid = 0;
390 delay = (long)*data;
391 shift(input_col, working, npoints, delay, zero, circular);
392 return mse(match_col, working, npoints);
393}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
int get_long(long *iptr, char *s)
Parses a long integer value from the given string.
Definition data_scan.c:255
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long simplexMin(double *yReturn, double *xGuess, double *dxGuess, double *xLowerLimit, double *xUpperLimit, short *disable, long dimensions, double target, double tolerance, double(*func)(double *x, long *invalid), void(*report)(double ymin, double *xmin, long pass, long evals, long dims), long maxEvaluations, long maxPasses, long maxDivisions, double divisorFactor, double passRangeFactor, unsigned long flags)
Top-level convenience function for simplex-based minimization.
Definition simplex.c:472