SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsshift.c
Go to the documentation of this file.
1/**
2 * @file sddsshift.c
3 * @brief Program for shifting data columns in SDDS files.
4 *
5 * @details
6 * This program provides functionalities for shifting data columns in SDDS files.
7 * It supports several shift modes, including zero-padding and circular shifting, and allows
8 * matching columns to minimize the least squares error during the shift process.
9 *
10 * @section Usage
11 * ```
12 * sddsshift [<inputfile>] [<outputfile>]
13 * [-pipe=[input][,output]]
14 * -columns=<inputcol>[,...]
15 * [-zero]
16 * [-circular]
17 * [-shift=<points>]
18 * [-match=<matchcol>]
19 * [-majorOrder=row|column]
20 * ```
21 *
22 * @section Options
23 * | Required | Description |
24 * |---------------------------------------|---------------------------------------------------------------------------------------|
25 * | `-columns` | Specify the data columns to shift. Wildcards are accepted. |
26 *
27 * | Optional | Description |
28 * |---------------------------------------|---------------------------------------------------------------------------------------|
29 * | `-pipe` | Define pipe output options. |
30 * | `-zero` | Set exposed end-points to zero. |
31 * | `-circular` | Perform circular shifting of data. |
32 * | `-shift` | Number of rows to shift (positive = later, negative = earlier). |
33 * | `-match` | Specify a column to match for least squares error minimization. |
34 * | `-majorOrder` | Specify the output file order. |
35 *
36 * @subsection Incompatibilities
37 * - Either `-shift` or `-match` must be provided, but not both.
38 * - `-zero` and `-circular` options are mutually exclusive.
39 *
40 * @copyright
41 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
42 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
43 *
44 * @license
45 * This file is distributed under the terms of the Software License Agreement
46 * found in the file LICENSE included with this distribution.
47 *
48 * @author
49 * M. Borland, C. Saunders, R. Soliday, H. Shang
50 */
51
52#include "SDDS.h"
53#include "mdb.h"
54#include "scan.h"
55
56/* Enumeration for option types */
57typedef enum {
58 CLO_PIPE,
59 CLO_COLUMNS,
60 CLO_DELAY,
61 CLO_MATCH,
62 CLO_ZERO,
63 CLO_MAJOR_ORDER,
64 CLO_CIRCULAR,
65 N_OPTIONS
66} option_type;
67
68static char *option[N_OPTIONS] = {
69 "pipe",
70 "columns",
71 "shift",
72 "match",
73 "zero",
74 "majorOrder",
75 "circular"};
76
77/* Usage information */
78static char *usage =
79 "sddsshift [<inputfile>] [<outputfile>]\n"
80 " [-pipe=[input][,output]] -columns=<inputcol>[,...]\n"
81 " [-zero | -circular] [-shift=<points> | -match=<matchcol>]\n"
82 " [-majorOrder=row|column]\n\n"
83 "-columns Provide <inputcols>, i.e., the data columns to be shifted.\n"
84 " Wildcards accepted.\n"
85 "-shift Provide number of points to shift in rows.\n"
86 " (positive = later, negative = earlier).\n"
87 "-match Provide <matchcol>. <inputcol> is shifted to\n"
88 " minimize the least squares error relative to <matchcol>.\n"
89 "-zero Set exposed end-points to zero.\n"
90 "-circular Shift the data in a circular fashion.\n"
91 "-majorOrder Specify output file in row or column major order.\n\n"
92 "sddsshift shifts specified data columns by rows. A copy of <inputfile> is made with the\n"
93 "addition of new columns \"Shifted<inputcol>\". Exposed end-points\n"
94 "are set to zero if the zero option is provided or\n"
95 "the value of the first/last row in <inputcol> as appropriate.\n"
96 "A parameter \"<inputcol>Shift\" contains the number of rows shifted.\n";
97
98/* Function prototypes */
99void shift(double *inputcol, double *outputcol, int64_t npoints, long delay, long zero, long circular);
100double mse(double *y1, double *y2, long npoints);
101double simplex_driver(double *data, long *invalid);
102long resolve_column_names(SDDS_DATASET *SDDSin, char ***column, int32_t *columns);
103
104/* Global variables */
105double *input_col, *working, *match_col;
106int64_t npoints;
107long zero, circular;
108
109int main(int argc, char *argv[]) {
110 SCANNED_ARG *s_arg;
111 double *output_col;
112 char *input_col_name, *match_col_name, *inputfile, *outputfile;
113 long i, i_arg, delay, tmp_file_used;
114 unsigned long pipe_flags, major_order_flag;
115 char actual_name[256], actual_desc[256], delay_name[256];
116 double sim_delay, lower, upper, final_mse;
117 SDDS_DATASET SDDS_input, SDDS_output;
118 short column_major_order = -1;
119 char **input_col_names = NULL;
120 int32_t n_input_col_names = 0;
121
123 argc = scanargs(&s_arg, argc, argv);
124 if (argc < 2) {
125 bomb(NULL, usage);
126 }
127
128 /* Initialize flags and defaults */
129 tmp_file_used = 0;
130 pipe_flags = 0;
131 inputfile = outputfile = input_col_name = match_col_name = NULL;
132 delay = 0;
133 sim_delay = 0.0;
134 zero = circular = 0;
135 input_col = match_col = output_col = working = NULL;
136
137 /* Process arguments */
138 for (i_arg = 1; i_arg < argc; i_arg++) {
139 if (s_arg[i_arg].arg_type == OPTION) {
140 delete_chars(s_arg[i_arg].list[0], "_");
141 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
142 case CLO_MAJOR_ORDER:
143 major_order_flag = 0;
144 s_arg[i_arg].n_items--;
145 if (s_arg[i_arg].n_items > 0 &&
146 (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
147 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
148 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) {
149 SDDS_Bomb("Invalid -majorOrder syntax/values");
150 }
151 if (major_order_flag & SDDS_COLUMN_MAJOR_ORDER) {
152 column_major_order = 1;
153 } else if (major_order_flag & SDDS_ROW_MAJOR_ORDER) {
154 column_major_order = 0;
155 }
156 break;
157 case CLO_PIPE:
158 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags)) {
159 SDDS_Bomb("Invalid -pipe syntax");
160 }
161 break;
162 case CLO_COLUMNS:
163 if (s_arg[i_arg].n_items < 2) {
164 SDDS_Bomb("Invalid -columns syntax.");
165 }
166 if (n_input_col_names) {
167 SDDS_Bomb("Invalid syntax: specify -columns once only");
168 }
169 input_col_names = tmalloc(sizeof(*input_col_names) * (s_arg[i_arg].n_items - 1));
170 for (i = 1; i < s_arg[i_arg].n_items; i++) {
171 input_col_names[i - 1] = s_arg[i_arg].list[i];
172 }
173 n_input_col_names = s_arg[i_arg].n_items - 1;
174 break;
175 case CLO_DELAY:
176 if (s_arg[i_arg].n_items != 2) {
177 SDDS_Bomb("Invalid -delay option.");
178 }
179 if (!get_long(&delay, s_arg[i_arg].list[1])) {
180 SDDS_Bomb("Invalid delay value provided.");
181 }
182 break;
183 case CLO_MATCH:
184 if (s_arg[i_arg].n_items != 2) {
185 SDDS_Bomb("Invalid -match option.");
186 }
187 match_col_name = s_arg[i_arg].list[1];
188 break;
189 case CLO_ZERO:
190 zero = 1;
191 break;
192 case CLO_CIRCULAR:
193 circular = 1;
194 break;
195 default:
196 fprintf(stderr, "Error (%s): unknown switch: %s\n", argv[0], s_arg[i_arg].list[0]);
197 exit(EXIT_FAILURE);
198 }
199 } else {
200 if (!inputfile) {
201 inputfile = s_arg[i_arg].list[0];
202 } else if (!outputfile) {
203 outputfile = s_arg[i_arg].list[0];
204 } else {
205 SDDS_Bomb("Too many files provided.");
206 }
207 }
208 }
209
210 processFilenames("sddsshift", &inputfile, &outputfile, pipe_flags, 1, &tmp_file_used);
211
212 if (zero && circular) {
213 SDDS_Bomb("The -zero and -circular options are mutually exclusive.");
214 }
215
216 if (n_input_col_names == 0) {
217 SDDS_Bomb("A shift column is not given!");
218 }
219 if (!match_col_name && !delay) {
220 SDDS_Bomb("Either match column or shift should be provided.");
221 }
222 if (match_col_name && delay) {
223 SDDS_Bomb("-match column option and -shift option are incompatible.");
224 }
225
226 /* Initialize SDDS input */
227 if (!SDDS_InitializeInput(&SDDS_input, inputfile)) {
228 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
229 }
230
231 if (!resolve_column_names(&SDDS_input, &input_col_names, &n_input_col_names)) {
232 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
233 }
234
235 if (!SDDS_InitializeCopy(&SDDS_output, &SDDS_input, outputfile, "w")) {
236 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
237 exit(EXIT_FAILURE);
238 }
239
240 SDDS_output.layout.data_mode.column_major = column_major_order != -1 ? column_major_order : SDDS_input.layout.data_mode.column_major;
241
242 for (i = 0; i < n_input_col_names; i++) {
243 snprintf(actual_name, sizeof(actual_name), "Shifted%s", input_col_names[i]);
244 snprintf(actual_desc, sizeof(actual_desc), "Shifted %s", input_col_names[i]);
245 snprintf(delay_name, sizeof(delay_name), "%sShift", input_col_names[i]);
246 if (SDDS_DefineColumn(&SDDS_output, actual_name, NULL, NULL, actual_desc, NULL, SDDS_DOUBLE, 0) < 0 ||
247 SDDS_DefineParameter(&SDDS_output, delay_name, NULL, NULL, NULL, NULL, SDDS_LONG, NULL) < 0) {
248 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
249 exit(EXIT_FAILURE);
250 }
251 }
252
253 if (!SDDS_WriteLayout(&SDDS_output)) {
254 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
255 exit(EXIT_FAILURE);
256 }
257
258 while ((SDDS_ReadPage(&SDDS_input)) > 0) {
259 if (!SDDS_CopyPage(&SDDS_output, &SDDS_input)) {
260 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
261 }
262
263 npoints = SDDS_CountRowsOfInterest(&SDDS_input);
264 if (npoints < 0) {
265 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
266 }
267
268 for (i = 0; i < n_input_col_names; i++) {
269 snprintf(actual_name, sizeof(actual_name), "Shifted%s", input_col_names[i]);
270 snprintf(delay_name, sizeof(delay_name), "%sShift", input_col_names[i]);
271 input_col = SDDS_GetColumnInDoubles(&SDDS_input, input_col_names[i]);
272 if (!input_col) {
273 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
274 }
275
276 output_col = calloc(npoints, sizeof(*output_col));
277 if (!output_col) {
278 SDDS_Bomb("Memory allocation failure");
279 }
280
281 if (match_col_name) {
282 match_col = SDDS_GetColumnInDoubles(&SDDS_input, match_col_name);
283 if (!match_col) {
284 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
285 }
286 working = calloc(npoints, sizeof(*working));
287 if (!working) {
288 SDDS_Bomb("Memory allocation failure");
289 }
290 lower = -((double)npoints - 1);
291 upper = (double)npoints - 1;
292 simplexMin(&final_mse, &sim_delay, NULL, &lower, &upper, NULL, 1, 1e-6, 1e-12, simplex_driver, NULL, 2 * npoints, 6, 12, 3, 1.0, 0);
293 delay = (long)sim_delay;
294 free(working);
295 free(match_col);
296 match_col = working = NULL;
297 }
298
299 shift(input_col, output_col, npoints, delay, zero, circular);
300 if (!SDDS_SetParameters(&SDDS_output, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, delay_name, delay, NULL) ||
301 !SDDS_SetColumn(&SDDS_output, SDDS_SET_BY_NAME, output_col, npoints, actual_name)) {
302 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
303 }
304
305 free(output_col);
306 free(input_col);
307 output_col = input_col = NULL;
308 }
309
310 if (!SDDS_WritePage(&SDDS_output)) {
311 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
312 }
313 }
314
315 if (!SDDS_Terminate(&SDDS_input) || !SDDS_Terminate(&SDDS_output)) {
316 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
317 }
318
319 free_scanargs(&s_arg, argc);
320 return EXIT_SUCCESS;
321}
322
323long resolve_column_names(SDDS_DATASET *SDDSin, char ***column, int32_t *columns) {
324 long i;
325
326 SDDS_SetColumnFlags(SDDSin, 0);
327 for (i = 0; i < *columns; i++) {
328 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, (*column)[i], SDDS_OR)) {
329 return 0;
330 }
331 }
332 free(*column);
333 *column = SDDS_GetColumnNames(SDDSin, columns);
334 if (!*column || *columns == 0) {
335 SDDS_SetError("No columns found");
336 return 0;
337 }
338 return 1;
339}
340
341void shift(double *inputcol, double *outputcol, int64_t npoints, long delay, long zero, long circular) {
342 int64_t i, j;
343
344 if (circular) {
345 short local_buffer = 0;
346 if (inputcol == outputcol) {
347 inputcol = tmalloc(sizeof(*inputcol) * npoints);
348 memcpy(inputcol, outputcol, sizeof(*inputcol) * npoints);
349 local_buffer = 1;
350 }
351 for (i = 0; i < npoints; i++) {
352 j = (i - delay) % npoints;
353 if (j < 0) {
354 j += npoints;
355 }
356 outputcol[i] = inputcol[j];
357 }
358 if (local_buffer) {
359 free(inputcol);
360 }
361 } else {
362 if (delay < 0) {
363 delay = -delay;
364 for (i = 0; i < npoints - delay; i++) {
365 outputcol[i] = inputcol[i + delay];
366 }
367 for (i = npoints - delay; i < npoints; i++) {
368 outputcol[i] = zero ? 0 : inputcol[npoints - 1];
369 }
370 } else {
371 for (i = npoints - 1; i >= delay; i--) {
372 outputcol[i] = inputcol[i - delay];
373 }
374 for (i = 0; i < delay; i++) {
375 outputcol[i] = zero ? 0 : inputcol[0];
376 }
377 }
378 }
379}
380
381double mse(double *y1, double *y2, long npoints) {
382 long i;
383 double error = 0;
384
385 for (i = 0; i < npoints; i++) {
386 error += (y1[i] - y2[i]) * (y1[i] - y2[i]);
387 }
388 return error / npoints;
389}
390
391double simplex_driver(double *data, long *invalid) {
392 long delay;
393
394 *invalid = 0;
395 delay = (long)*data;
396 shift(input_col, working, npoints, delay, zero, circular);
397 return mse(match_col, working, npoints);
398}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
int get_long(long *iptr, char *s)
Parses a long integer value from the given string.
Definition data_scan.c:255
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long simplexMin(double *yReturn, double *xGuess, double *dxGuess, double *xLowerLimit, double *xUpperLimit, short *disable, long dimensions, double target, double tolerance, double(*func)(double *x, long *invalid), void(*report)(double ymin, double *xmin, long pass, long evals, long dims), long maxEvaluations, long maxPasses, long maxDivisions, double divisorFactor, double passRangeFactor, unsigned long flags)
Top-level convenience function for simplex-based minimization.
Definition simplex.c:472