SDDSlib
Loading...
Searching...
No Matches
sddssortcolumn.c
Go to the documentation of this file.
1/**
2 * @file sddssortcolumn.c
3 * @brief This program rearranges the columns of an SDDS input file into a specified order.
4 *
5 * The program supports sorting based on user-specified lists, external files, or BPM order.
6 * Columns can be sorted in increasing or decreasing order. It is a tool for manipulating
7 * and organizing data in SDDS format files.
8 *
9 * ### Key Features:
10 * - Sort columns based on a user-provided list or external file.
11 * - Optionally sort columns by BPM position in a storage ring.
12 * - Supports increasing and decreasing order sorting.
13 * - Handles SDDS input and output file formats.
14 *
15 * ### Usage:
16 * ```
17 * sddssortcolumn [<SDDSinput>] [<SDDSoutput>]
18 * [-pipe=[input][,output]]
19 * [-sortList=<list of columns in order>]
20 * [-decreasing]
21 * [-bpmOrder]
22 * [-sortWith=<filename>,column=<string>]
23 * ```
24 *
25 * ### Options:
26 * - `-pipe`: Use input/output pipes.
27 * - `-sortList=<list>`: Specify the order of column names in a list.
28 * - `-decreasing`: Sort columns in decreasing order.
29 * - `-bpmOrder`: Sort columns by their BPM position.
30 * - `-sortWith=<filename>,column=<string>`: Use an external file to define the sorting order.
31 *
32 * ### Example:
33 * ```
34 * sddssortcolumn input.sdds output.sdds -sortList=Column1,Column2 -decreasing
35 * ```
36 *
37 * @copyright
38 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
39 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
40 *
41 * @license
42 * This file is distributed under the terms of the Software License Agreement
43 * found in the file LICENSE included with this distribution.
44 *
45 * @author H. Shang, R. Soliday
46 */
47
48#include "mdb.h"
49#include "SDDS.h"
50#include "scan.h"
51
52#if defined(_WIN32)
53# include <process.h>
54# define pid_t int
55#else
56# if defined(linux)
57# include <sys/types.h>
58# endif
59# include <unistd.h>
60#endif
61
62/* Enumeration for option types */
63typedef enum {
64 SET_PIPE,
65 SET_SORTLIST,
66 SET_DECREASING,
67 SET_BPMORDER,
68 SET_SORTWITH,
69 N_OPTIONS
70} option_type;
71
72static char *option[N_OPTIONS] = {
73 "pipe",
74 "sortList",
75 "decreasing",
76 "bpmOrder",
77 "sortWith",
78};
79
80static char *usage =
81 "Usage:\n"
82 " sddssortcolumn [<SDDSinput>] [<SDDSoutput>]\n"
83 " [-pipe=[input][,output]]\n"
84 " [-sortList=<list of columns in order>]\n"
85 " [-decreasing]\n"
86 " [-bpmOrder]\n"
87 " [-sortWith=<filename>,column=<string>]\n\n"
88 "Options:\n"
89 " -sortList <list of columns>\n"
90 " Specify the order of column names in a list.\n\n"
91 " -sortWith=<filename>,column=<string>\n"
92 " Sort the columns of the input based on the order defined in the\n"
93 " specified <column> of <filename>. This option overrides any other sorting order.\n\n"
94 " -bpmOrder\n"
95 " Sort the columns by their assumed BPM position in the storage ring.\n\n"
96 " -decreasing\n"
97 " Sort the columns in decreasing order. The default is increasing order.\n\n"
98 "Description:\n"
99 " Rearrange the columns of an SDDS input file into the specified order.\n\n"
100 "Program Information:\n"
101 " Program by Hairong Shang. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
102
103static char **column_name;
104static long increasing = 1, bpm_order = 0;
105static int32_t columns;
106
107static long get_bpm_suborder(char *bpm_name) {
108 long suborder;
109
110 if (wild_match(bpm_name, "*A:P0*"))
111 suborder = 1;
112 else if (wild_match(bpm_name, "*A:P1*"))
113 suborder = 2;
114 else if (wild_match(bpm_name, "*A:P2*"))
115 suborder = 3;
116 else if (wild_match(bpm_name, "*A:P3*"))
117 suborder = 4;
118 else if (wild_match(bpm_name, "*A:P4*"))
119 suborder = 5;
120 else if (wild_match(bpm_name, "*A:P5*"))
121 suborder = 6;
122 else if (wild_match(bpm_name, "*B:P5*"))
123 suborder = 7;
124 else if (wild_match(bpm_name, "*B:P4*"))
125 suborder = 8;
126 else if (wild_match(bpm_name, "*B:P3*"))
127 suborder = 9;
128 else if (wild_match(bpm_name, "*B:P2*"))
129 suborder = 10;
130 else if (wild_match(bpm_name, "*B:P1*"))
131 suborder = 11;
132 else if (wild_match(bpm_name, "*B:P0*"))
133 suborder = 12;
134 else if (wild_match(bpm_name, "*C:P0*"))
135 suborder = 13;
136 else if (wild_match(bpm_name, "*BM:P1*"))
137 suborder = 14;
138 else if (wild_match(bpm_name, "*BM:P2*"))
139 suborder = 15;
140 else if (wild_match(bpm_name, "*ID:P1*"))
141 suborder = 16;
142 else if (wild_match(bpm_name, "*ID:P2*"))
143 suborder = 17;
144 else
145 suborder = 18;
146
147 return suborder;
148}
149
150static int compare_strings(const void *vindex1, const void *vindex2) {
151 long index1 = *(long *)vindex1;
152 long index2 = *(long *)vindex2;
153 long comparison, sector1, sector2, subsector1, subsector2;
154
155 if (bpm_order) {
156 if (sscanf(column_name[index1], "S%ld", &sector1) != 1)
157 sector1 = 0;
158 if (sscanf(column_name[index2], "S%ld", &sector2) != 1)
159 sector2 = 0;
160 if (sector1 == 0 && sector2 == 0) {
161 comparison = strcmp(column_name[index1], column_name[index2]);
162 } else {
163 if (sector1 > sector2)
164 comparison = 1;
165 else if (sector1 < sector2)
166 comparison = -1;
167 else {
168 subsector1 = get_bpm_suborder(column_name[index1]);
169 subsector2 = get_bpm_suborder(column_name[index2]);
170 if (subsector1 > subsector2)
171 comparison = 1;
172 else if (subsector1 < subsector2)
173 comparison = -1;
174 else
175 comparison = 0;
176 }
177 }
178 } else {
179 comparison = strcmp(column_name[index1], column_name[index2]);
180 }
181
182 if (!increasing)
183 comparison = -comparison;
184
185 return comparison;
186}
187
188int main(int argc, char **argv) {
189 SDDS_DATASET sdds_input, sdds_output, sdds_sort;
190 char *input = NULL, *output = NULL;
191 char **sort_list = NULL, **parameter_name = NULL;
192 char *sort_file = NULL, *sort_column = NULL, **sorted_column = NULL;
193 long i_arg, *sort_column_index = NULL, index;
194 int64_t i, sort_lists = 0, rows, count;
195 SCANNED_ARG *s_arg;
196 long tmpfile_used = 0;
197 int32_t parameters = 0;
198 unsigned long pipe_flags = 0, dummyflags;
199
201 argc = scanargs(&s_arg, argc, argv);
202 if (argc < 2) {
203 fprintf(stderr, "%s", usage);
204 return EXIT_FAILURE;
205 }
206
207 for (i_arg = 1; i_arg < argc; i_arg++) {
208 if (s_arg[i_arg].arg_type == OPTION) {
209 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
210 case SET_PIPE:
211 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags)) {
212 fprintf(stderr, "Invalid -pipe syntax\n");
213 return EXIT_FAILURE;
214 }
215 break;
216 case SET_DECREASING:
217 increasing = 0;
218 break;
219 case SET_BPMORDER:
220 bpm_order = 1;
221 break;
222 case SET_SORTLIST:
223 sort_lists = s_arg[i_arg].n_items - 1;
224 sort_list = malloc(sizeof(*sort_list) * sort_lists);
225 for (i = 0; i < sort_lists; i++) {
226 SDDS_CopyString(&sort_list[i], s_arg[i_arg].list[i + 1]);
227 }
228 break;
229 case SET_SORTWITH:
230 if (s_arg[i_arg].n_items != 3) {
231 fprintf(stderr, "Invalid -sortWith option given!\n");
232 return EXIT_FAILURE;
233 }
234 sort_file = s_arg[i_arg].list[1];
235 s_arg[i_arg].n_items = 1;
236 if (!scanItemList(&dummyflags, s_arg[i_arg].list + 2, &s_arg[i_arg].n_items, 0,
237 "column", SDDS_STRING, &sort_column, 1, 0, NULL) ||
238 !sort_column) {
239 fprintf(stderr, "Invalid -sortWith syntax/values\n");
240 return EXIT_FAILURE;
241 }
242 s_arg[i_arg].n_items = 3;
243 break;
244 default:
245 fprintf(stderr, "Error: unknown switch: %s\n", s_arg[i_arg].list[0]);
246 return EXIT_FAILURE;
247 }
248 } else {
249 if (input == NULL) {
250 input = s_arg[i_arg].list[0];
251 } else if (output == NULL) {
252 output = s_arg[i_arg].list[0];
253 } else {
254 fprintf(stderr, "Too many filenames\n");
255 return EXIT_FAILURE;
256 }
257 }
258 }
259
260 processFilenames("sddssort", &input, &output, pipe_flags, 0, &tmpfile_used);
261
262 if (!SDDS_InitializeInput(&sdds_input, input)) {
263 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
264 return EXIT_FAILURE;
265 }
266
267 if (!SDDS_InitializeOutput(&sdds_output, SDDS_BINARY, 1, NULL, NULL, output)) {
268 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
269 return EXIT_FAILURE;
270 }
271
272 column_name = (char **)SDDS_GetColumnNames(&sdds_input, &columns);
273 parameter_name = (char **)SDDS_GetParameterNames(&sdds_input, &parameters);
274
275 for (i = 0; i < parameters; i++) {
276 if (!SDDS_TransferParameterDefinition(&sdds_output, &sdds_input, parameter_name[i], parameter_name[i])) {
277 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
278 }
279 }
280
281 sort_column_index = malloc(sizeof(*sort_column_index) * columns);
282 for (i = 0; i < columns; i++) {
283 sort_column_index[i] = i;
284 }
285
286 if (sort_file && sort_column) {
287 if (sort_list && sort_lists) {
288 for (i = 0; i < sort_lists; i++) {
289 free(sort_list[i]);
290 }
291 free(sort_list);
292 }
293 sort_lists = 0;
294 sort_list = NULL;
295
296 if (!SDDS_InitializeInput(&sdds_sort, sort_file)) {
297 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
298 }
299
300 if (SDDS_ReadPage(&sdds_sort) < 0) {
301 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
302 }
303
304 if (!(sort_lists = SDDS_CountRowsOfInterest(&sdds_sort))) {
305 fprintf(stderr, "Zero rows found in sortWith file.\n");
306 return EXIT_FAILURE;
307 }
308
309 if (!(sort_list = (char **)SDDS_GetColumn(&sdds_sort, sort_column))) {
310 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
311 }
312
313 if (!SDDS_Terminate(&sdds_sort)) {
314 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
315 }
316 }
317
318 sorted_column = malloc(sizeof(*sorted_column) * columns);
319 count = 0;
320
321 if (sort_list) {
322 for (i = 0; i < sort_lists; i++) {
323 if ((index = match_string(sort_list[i], column_name, columns, EXACT_MATCH)) >= 0) {
324 sorted_column[count] = sort_list[i];
325 count++;
326 }
327 }
328
329 for (i = 0; i < columns; i++) {
330 if (match_string(column_name[i], sort_list, sort_lists, EXACT_MATCH) < 0) {
331 sorted_column[count] = column_name[i];
332 count++;
333 }
334 }
335 } else {
336 qsort((void *)sort_column_index, columns, sizeof(*sort_column_index), compare_strings);
337 for (i = 0; i < columns; i++) {
338 index = sort_column_index[i];
339 sorted_column[i] = column_name[index];
340 }
341 }
342
343 for (i = 0; i < columns; i++) {
344 if (!SDDS_TransferColumnDefinition(&sdds_output, &sdds_input, sorted_column[i], sorted_column[i])) {
345 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
346 }
347 }
348
349 if (!SDDS_WriteLayout(&sdds_output)) {
350 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
351 }
352
353 while (SDDS_ReadPage(&sdds_input) > 0) {
354 rows = SDDS_CountRowsOfInterest(&sdds_input);
355 if (!SDDS_StartPage(&sdds_output, rows)) {
356 fprintf(stderr, "Problem starting output page\n");
357 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
358 }
359
360 if (!SDDS_CopyParameters(&sdds_output, &sdds_input)) {
361 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
362 }
363
364 if (!SDDS_CopyColumns(&sdds_output, &sdds_input)) {
365 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
366 }
367
368 if (!SDDS_WritePage(&sdds_output)) {
369 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
370 }
371 }
372
373 if (!SDDS_Terminate(&sdds_input) || !SDDS_Terminate(&sdds_output)) {
374 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
375 return EXIT_FAILURE;
376 }
377
378 if (tmpfile_used && !replaceFileAndBackUp(input, output)) {
379 return EXIT_FAILURE;
380 }
381
382 if (parameters) {
383 for (i = 0; i < parameters; i++) {
384 free(parameter_name[i]);
385 }
386 free(parameter_name);
387 }
388
389 if (columns) {
390 for (i = 0; i < columns; i++) {
391 free(column_name[i]);
392 }
393 free(column_name);
394 }
395
396 if (sort_lists) {
397 for (i = 0; i < sort_lists; i++) {
398 free(sort_list[i]);
399 }
400 free(sort_list);
401 }
402
403 if (sort_column) {
404 free(sort_column);
405 }
406
407 free(sorted_column);
408 free(sort_column_index);
409 free_scanargs(&s_arg, argc);
410
411 return EXIT_SUCCESS;
412}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyColumns(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:387
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferParameterDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a parameter definition from a source dataset to a target dataset.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.
Definition wild_match.c:49