SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddssortcolumn.c
Go to the documentation of this file.
1/**
2 * @file sddssortcolumn.c
3 * @brief Rearranges the columns of an SDDS input file into a specified order.
4 *
5 * @details
6 * This program processes an SDDS file, allowing users to reorder its columns based on:
7 * - A specified list.
8 * - The order of BPMs in a storage ring.
9 * - Data from an external file.
10 *
11 * Sorting options include increasing or decreasing order. The output is written back to an SDDS file.
12 * The tool is useful for preparing SDDS data for analysis or presentation.
13 *
14 * @section Usage
15 * ```
16 * sddssortcolumn [<SDDSinput>] [<SDDSoutput>]
17 * [-pipe=[input][,output]]
18 * [-sortList=<list_of_columns>]
19 * [-decreasing]
20 * [-bpmOrder]
21 * [-sortWith=<filename>,column=<column_name>]
22 * ```
23 *
24 * @section Options
25 * | Optional | Description |
26 * |--------------|-----------------------------------------------------------------------------|
27 * | `-pipe` | Uses pipes for input/output data flow. |
28 * | `-sortList` | A comma-separated list specifying the desired column order. |
29 * | `-decreasing`| Sorts columns in decreasing order. By default, columns are sorted in increasing order. |
30 * | `-bpmOrder` | Orders columns by their BPM position in a storage ring. |
31 * | `-sortWith` | Specifies an external file and column for sorting. Overrides other sort orders. |
32 *
33 * @subsection Incompatibilities
34 * - Only one of the following may be specified:
35 * - `-sortWith`
36 * - `-bpmOrder`
37 * - `-sortList`
38 *
39 * @copyright
40 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
41 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
42 *
43 * @license
44 * This file is distributed under the terms of the Software License Agreement
45 * found in the file LICENSE included with this distribution.
46 *
47 * @author
48 * H. Shang, R. Soliday
49 */
50
51#include "mdb.h"
52#include "SDDS.h"
53#include "scan.h"
54
55#if defined(_WIN32)
56# include <process.h>
57# define pid_t int
58#else
59# if defined(linux)
60# include <sys/types.h>
61# endif
62# include <unistd.h>
63#endif
64
65/* Enumeration for option types */
66typedef enum {
67 SET_PIPE,
68 SET_SORTLIST,
69 SET_DECREASING,
70 SET_BPMORDER,
71 SET_SORTWITH,
72 N_OPTIONS
73} option_type;
74
75static char *option[N_OPTIONS] = {
76 "pipe",
77 "sortList",
78 "decreasing",
79 "bpmOrder",
80 "sortWith",
81};
82
83static char *usage =
84 "Usage:\n"
85 " sddssortcolumn [<SDDSinput>] [<SDDSoutput>]\n"
86 " [-pipe=[input][,output]]\n"
87 " [-sortList=<list of columns in order>]\n"
88 " [-decreasing]\n"
89 " [-bpmOrder]\n"
90 " [-sortWith=<filename>,column=<string>]\n\n"
91 "Options:\n"
92 " -sortList <list of columns>\n"
93 " Specify the order of column names in a list.\n\n"
94 " -sortWith=<filename>,column=<string>\n"
95 " Sort the columns of the input based on the order defined in the\n"
96 " specified <column> of <filename>. This option overrides any other sorting order.\n\n"
97 " -bpmOrder\n"
98 " Sort the columns by their assumed BPM position in the storage ring.\n\n"
99 " -decreasing\n"
100 " Sort the columns in decreasing order. The default is increasing order.\n\n"
101 "Description:\n"
102 " Rearrange the columns of an SDDS input file into the specified order.\n\n"
103 "Program Information:\n"
104 " Program by Hairong Shang. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
105
106static char **column_name;
107static long increasing = 1, bpm_order = 0;
108static int32_t columns;
109
110static long get_bpm_suborder(char *bpm_name) {
111 long suborder;
112
113 if (wild_match(bpm_name, "*A:P0*"))
114 suborder = 1;
115 else if (wild_match(bpm_name, "*A:P1*"))
116 suborder = 2;
117 else if (wild_match(bpm_name, "*A:P2*"))
118 suborder = 3;
119 else if (wild_match(bpm_name, "*A:P3*"))
120 suborder = 4;
121 else if (wild_match(bpm_name, "*A:P4*"))
122 suborder = 5;
123 else if (wild_match(bpm_name, "*A:P5*"))
124 suborder = 6;
125 else if (wild_match(bpm_name, "*B:P5*"))
126 suborder = 7;
127 else if (wild_match(bpm_name, "*B:P4*"))
128 suborder = 8;
129 else if (wild_match(bpm_name, "*B:P3*"))
130 suborder = 9;
131 else if (wild_match(bpm_name, "*B:P2*"))
132 suborder = 10;
133 else if (wild_match(bpm_name, "*B:P1*"))
134 suborder = 11;
135 else if (wild_match(bpm_name, "*B:P0*"))
136 suborder = 12;
137 else if (wild_match(bpm_name, "*C:P0*"))
138 suborder = 13;
139 else if (wild_match(bpm_name, "*BM:P1*"))
140 suborder = 14;
141 else if (wild_match(bpm_name, "*BM:P2*"))
142 suborder = 15;
143 else if (wild_match(bpm_name, "*ID:P1*"))
144 suborder = 16;
145 else if (wild_match(bpm_name, "*ID:P2*"))
146 suborder = 17;
147 else
148 suborder = 18;
149
150 return suborder;
151}
152
153static int compare_strings(const void *vindex1, const void *vindex2) {
154 long index1 = *(long *)vindex1;
155 long index2 = *(long *)vindex2;
156 long comparison, sector1, sector2, subsector1, subsector2;
157
158 if (bpm_order) {
159 if (sscanf(column_name[index1], "S%ld", &sector1) != 1)
160 sector1 = 0;
161 if (sscanf(column_name[index2], "S%ld", &sector2) != 1)
162 sector2 = 0;
163 if (sector1 == 0 && sector2 == 0) {
164 comparison = strcmp(column_name[index1], column_name[index2]);
165 } else {
166 if (sector1 > sector2)
167 comparison = 1;
168 else if (sector1 < sector2)
169 comparison = -1;
170 else {
171 subsector1 = get_bpm_suborder(column_name[index1]);
172 subsector2 = get_bpm_suborder(column_name[index2]);
173 if (subsector1 > subsector2)
174 comparison = 1;
175 else if (subsector1 < subsector2)
176 comparison = -1;
177 else
178 comparison = 0;
179 }
180 }
181 } else {
182 comparison = strcmp(column_name[index1], column_name[index2]);
183 }
184
185 if (!increasing)
186 comparison = -comparison;
187
188 return comparison;
189}
190
191int main(int argc, char **argv) {
192 SDDS_DATASET sdds_input, sdds_output, sdds_sort;
193 char *input = NULL, *output = NULL;
194 char **sort_list = NULL, **parameter_name = NULL;
195 char *sort_file = NULL, *sort_column = NULL, **sorted_column = NULL;
196 long i_arg, *sort_column_index = NULL, index;
197 int64_t i, sort_lists = 0, rows, count;
198 SCANNED_ARG *s_arg;
199 long tmpfile_used = 0;
200 int32_t parameters = 0;
201 unsigned long pipe_flags = 0, dummyflags;
202
204 argc = scanargs(&s_arg, argc, argv);
205 if (argc < 2) {
206 fprintf(stderr, "%s", usage);
207 return EXIT_FAILURE;
208 }
209
210 for (i_arg = 1; i_arg < argc; i_arg++) {
211 if (s_arg[i_arg].arg_type == OPTION) {
212 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
213 case SET_PIPE:
214 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags)) {
215 fprintf(stderr, "Invalid -pipe syntax\n");
216 return EXIT_FAILURE;
217 }
218 break;
219 case SET_DECREASING:
220 increasing = 0;
221 break;
222 case SET_BPMORDER:
223 bpm_order = 1;
224 break;
225 case SET_SORTLIST:
226 sort_lists = s_arg[i_arg].n_items - 1;
227 sort_list = malloc(sizeof(*sort_list) * sort_lists);
228 for (i = 0; i < sort_lists; i++) {
229 SDDS_CopyString(&sort_list[i], s_arg[i_arg].list[i + 1]);
230 }
231 break;
232 case SET_SORTWITH:
233 if (s_arg[i_arg].n_items != 3) {
234 fprintf(stderr, "Invalid -sortWith option given!\n");
235 return EXIT_FAILURE;
236 }
237 sort_file = s_arg[i_arg].list[1];
238 s_arg[i_arg].n_items = 1;
239 if (!scanItemList(&dummyflags, s_arg[i_arg].list + 2, &s_arg[i_arg].n_items, 0,
240 "column", SDDS_STRING, &sort_column, 1, 0, NULL) ||
241 !sort_column) {
242 fprintf(stderr, "Invalid -sortWith syntax/values\n");
243 return EXIT_FAILURE;
244 }
245 s_arg[i_arg].n_items = 3;
246 break;
247 default:
248 fprintf(stderr, "Error: unknown switch: %s\n", s_arg[i_arg].list[0]);
249 return EXIT_FAILURE;
250 }
251 } else {
252 if (input == NULL) {
253 input = s_arg[i_arg].list[0];
254 } else if (output == NULL) {
255 output = s_arg[i_arg].list[0];
256 } else {
257 fprintf(stderr, "Too many filenames\n");
258 return EXIT_FAILURE;
259 }
260 }
261 }
262
263 processFilenames("sddssort", &input, &output, pipe_flags, 0, &tmpfile_used);
264
265 if (!SDDS_InitializeInput(&sdds_input, input)) {
266 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
267 return EXIT_FAILURE;
268 }
269
270 if (!SDDS_InitializeOutput(&sdds_output, SDDS_BINARY, 1, NULL, NULL, output)) {
271 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
272 return EXIT_FAILURE;
273 }
274
275 column_name = (char **)SDDS_GetColumnNames(&sdds_input, &columns);
276 parameter_name = (char **)SDDS_GetParameterNames(&sdds_input, &parameters);
277
278 for (i = 0; i < parameters; i++) {
279 if (!SDDS_TransferParameterDefinition(&sdds_output, &sdds_input, parameter_name[i], parameter_name[i])) {
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281 }
282 }
283
284 sort_column_index = malloc(sizeof(*sort_column_index) * columns);
285 for (i = 0; i < columns; i++) {
286 sort_column_index[i] = i;
287 }
288
289 if (sort_file && sort_column) {
290 if (sort_list && sort_lists) {
291 for (i = 0; i < sort_lists; i++) {
292 free(sort_list[i]);
293 }
294 free(sort_list);
295 }
296 sort_lists = 0;
297 sort_list = NULL;
298
299 if (!SDDS_InitializeInput(&sdds_sort, sort_file)) {
300 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
301 }
302
303 if (SDDS_ReadPage(&sdds_sort) < 0) {
304 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
305 }
306
307 if (!(sort_lists = SDDS_CountRowsOfInterest(&sdds_sort))) {
308 fprintf(stderr, "Zero rows found in sortWith file.\n");
309 return EXIT_FAILURE;
310 }
311
312 if (!(sort_list = (char **)SDDS_GetColumn(&sdds_sort, sort_column))) {
313 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
314 }
315
316 if (!SDDS_Terminate(&sdds_sort)) {
317 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
318 }
319 }
320
321 sorted_column = malloc(sizeof(*sorted_column) * columns);
322 count = 0;
323
324 if (sort_list) {
325 for (i = 0; i < sort_lists; i++) {
326 if ((index = match_string(sort_list[i], column_name, columns, EXACT_MATCH)) >= 0) {
327 sorted_column[count] = sort_list[i];
328 count++;
329 }
330 }
331
332 for (i = 0; i < columns; i++) {
333 if (match_string(column_name[i], sort_list, sort_lists, EXACT_MATCH) < 0) {
334 sorted_column[count] = column_name[i];
335 count++;
336 }
337 }
338 } else {
339 qsort((void *)sort_column_index, columns, sizeof(*sort_column_index), compare_strings);
340 for (i = 0; i < columns; i++) {
341 index = sort_column_index[i];
342 sorted_column[i] = column_name[index];
343 }
344 }
345
346 for (i = 0; i < columns; i++) {
347 if (!SDDS_TransferColumnDefinition(&sdds_output, &sdds_input, sorted_column[i], sorted_column[i])) {
348 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
349 }
350 }
351
352 if (!SDDS_WriteLayout(&sdds_output)) {
353 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
354 }
355
356 while (SDDS_ReadPage(&sdds_input) > 0) {
357 rows = SDDS_CountRowsOfInterest(&sdds_input);
358 if (!SDDS_StartPage(&sdds_output, rows)) {
359 fprintf(stderr, "Problem starting output page\n");
360 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
361 }
362
363 if (!SDDS_CopyParameters(&sdds_output, &sdds_input)) {
364 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
365 }
366
367 if (!SDDS_CopyColumns(&sdds_output, &sdds_input)) {
368 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
369 }
370
371 if (!SDDS_WritePage(&sdds_output)) {
372 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
373 }
374 }
375
376 if (!SDDS_Terminate(&sdds_input) || !SDDS_Terminate(&sdds_output)) {
377 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
378 return EXIT_FAILURE;
379 }
380
381 if (tmpfile_used && !replaceFileAndBackUp(input, output)) {
382 return EXIT_FAILURE;
383 }
384
385 if (parameters) {
386 for (i = 0; i < parameters; i++) {
387 free(parameter_name[i]);
388 }
389 free(parameter_name);
390 }
391
392 if (columns) {
393 for (i = 0; i < columns; i++) {
394 free(column_name[i]);
395 }
396 free(column_name);
397 }
398
399 if (sort_lists) {
400 for (i = 0; i < sort_lists; i++) {
401 free(sort_list[i]);
402 }
403 free(sort_list);
404 }
405
406 if (sort_column) {
407 free(sort_column);
408 }
409
410 free(sorted_column);
411 free(sort_column_index);
412 free_scanargs(&s_arg, argc);
413
414 return EXIT_SUCCESS;
415}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyColumns(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:387
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferParameterDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a parameter definition from a source dataset to a target dataset.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.
Definition wild_match.c:49