SDDSlib
Loading...
Searching...
No Matches
sddssplit.c
Go to the documentation of this file.
1/**
2 * @file sddssplit.c
3 * @brief Utility for splitting SDDS files into multiple files, one page per file.
4 *
5 * This program reads an SDDS (Self Describing Data Set) file and splits its contents
6 * into multiple files, with each page saved as a separate SDDS file. It supports
7 * both binary and ASCII output formats and allows customization of output filenames
8 * using various parameters.
9 *
10 * ### Features:
11 * - Splits an SDDS file into multiple output files based on pages.
12 * - Allows customization of output filenames via rootname, digits, and extension.
13 * - Supports grouping pages using a specified parameter.
14 * - Provides options for setting the output file format (binary or ASCII).
15 * - Supports row-major and column-major order for output.
16 *
17 * ### Command-line Options:
18 * - `-binary`, `-ascii`: Specify the output format.
19 * - `-digits=<number>`: Number of digits in the output filename indices.
20 * - `-rootname=<string>`: Rootname for output filenames.
21 * - `-firstPage=<number>`: First page of the input file to include.
22 * - `-lastPage=<number>`: Last page of the input file to include.
23 * - `-interval=<number>`: Interval between pages to include in output.
24 * - `-extension=<string>`: Extension for output files (default is "sdds").
25 * - `-groupParameter=<parameterName>`: Parameter for grouping pages into output files.
26 * - `-nameParameter=<filenameParameter>`: Parameter for naming output files.
27 * - `-offset=<integer>`: Offset for computing output filename indices.
28 * - `-majorOrder=row|column`: Select output order (row-major or column-major).
29 *
30 * ### Usage Example:
31 * ```
32 * sddssplit input.sdds -binary -rootname=output -digits=4 -firstPage=1 -interval=2
33 * ```
34 *
35 * @copyright
36 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
37 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
38 *
39 * @license
40 * This file is distributed under the terms of the Software License Agreement
41 * found in the file LICENSE included with this distribution.
42 *
43 * @author M. Borland, C. Saunders, R. Soliday, H. Shang
44 */
45
46#include "mdb.h"
47#include "SDDS.h"
48#include "scan.h"
49
50/* Enumeration for option types */
51typedef enum {
52 SET_BINARY,
53 SET_ASCII,
54 SET_DIGITS,
55 SET_ROOTNAME,
56 SET_FIRST_PAGE,
57 SET_LAST_PAGE,
58 SET_INTERVAL,
59 SET_EXTENSION,
60 SET_PIPE,
61 SET_NAMEPARAMETER,
62 SET_OFFSET,
63 SET_MAJOR_ORDER,
64 SET_GROUPPARAMETER,
65 N_OPTIONS
66} option_type;
67
68static char *option[N_OPTIONS] = {
69 "binary",
70 "ascii",
71 "digits",
72 "rootname",
73 "firstpage",
74 "lastpage",
75 "interval",
76 "extension",
77 "pipe",
78 "nameparameter",
79 "offset",
80 "majorOrder",
81 "groupparameter"};
82
83static char *USAGE =
84 "sddssplit <inputFile> -pipe[=input]\n"
85 " [-binary | -ascii]\n"
86 " [-digits=<number>]\n"
87 " [-rootname=<string>]\n"
88 " [-firstPage=<number>]\n"
89 " [-lastPage=<number>]\n"
90 " [-interval=<number>]\n"
91 " [-extension=<string>]\n"
92 " [-groupParameter=<parameterName>]\n"
93 " [-nameParameter=<filenameParameter>]\n"
94 " [-offset=<integer>]\n"
95 " [-majorOrder=row|column]\n\n"
96
97 "sddssplit splits an SDDS file into many SDDS files, with each page going to a separate file.\n"
98 "The files are named <rootname><integer>.sdds, where <rootname> is either the filename for\n"
99 "the source file or the specified string, and <integer> is by default <page-number>-<offset>\n"
100 "and is printed to the number of digits given by -digits (3 is the default).\n\n"
101
102 "-binary, -ascii Specifies whether binary (default) or ASCII output is desired.\n"
103 "-rootname Rootname to use for output filenames. Defaults to the source filename.\n"
104 "-digits Number of digits to use in the filenames (3 is default).\n"
105 "-firstPage First page of input file to include in output (1 is default).\n"
106 "-lastPage Last page of input file to include in output (EOF is default).\n"
107 "-interval Interval between pages included in output (1 is default).\n"
108 "-extension Extension for output files (sdds is default).\n"
109 "-groupParameter Parameter of input file to use in grouping pages into output files.\n"
110 "-nameParameter Parameter of input file to use for naming the output files.\n"
111 "-offset Offset of page number to compute index for output filename.\n"
112 "-majorOrder Select row- or column-major order output (default is row).\n\n"
113
114 "Program by Michael Borland. ("__DATE__
115 " "__TIME__
116 ", SVN revision: " SVN_VERSION ")\n";
117
118int main(int argc, char **argv) {
119 SDDS_DATASET sdds_dataset, sdds_orig;
120 long i_arg, offset = 0;
121 SCANNED_ARG *s_arg;
122 char *input = NULL, *rootname = NULL, name[500], format[100], *extension = "sdds";
123 long ascii_output = 0, binary_output = 0, retval, digits = 3;
124 long first_page = 0, last_page = 0, interval = 0;
125 unsigned long pipe_flags = 0, major_order_flag = 0;
126 char *file_parameter = NULL, *name_from_parameter = NULL, *group_parameter_name = NULL;
127 char *last_group_parameter = NULL, *this_group_parameter = NULL;
128 short column_major_order = -1, file_active = 0;
129
131 argc = scanargs(&s_arg, argc, argv);
132 if (argc < 2) {
133 fprintf(stderr, "%s", USAGE);
134 return 1;
135 }
136
137 for (i_arg = 1; i_arg < argc; i_arg++) {
138 if (s_arg[i_arg].arg_type == OPTION) {
139 delete_chars(s_arg[i_arg].list[0], "_");
140 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
141 case SET_MAJOR_ORDER:
142 major_order_flag = 0;
143 s_arg[i_arg].n_items--;
144 if (s_arg[i_arg].n_items > 0 &&
145 !scanItemList(&major_order_flag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
146 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
147 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)) {
148 fprintf(stderr, "Error: Invalid -majorOrder syntax/values\n");
149 return 1;
150 }
151 column_major_order = (major_order_flag & SDDS_COLUMN_MAJOR_ORDER) ? 1 : 0;
152 break;
153 case SET_BINARY:
154 binary_output = 1;
155 ascii_output = 0;
156 break;
157 case SET_ASCII:
158 ascii_output = 1;
159 binary_output = 0;
160 break;
161 case SET_DIGITS:
162 if (s_arg[i_arg].n_items != 2 || sscanf(s_arg[i_arg].list[1], "%ld", &digits) != 1 || digits <= 0) {
163 fprintf(stderr, "Error: Invalid -digits syntax\n");
164 return 1;
165 }
166 break;
167 case SET_ROOTNAME:
168 if (s_arg[i_arg].n_items != 2) {
169 fprintf(stderr, "Error: Invalid -rootname syntax\n");
170 return 1;
171 }
172 rootname = s_arg[i_arg].list[1];
173 break;
174 case SET_FIRST_PAGE:
175 if (s_arg[i_arg].n_items != 2 || sscanf(s_arg[i_arg].list[1], "%ld", &first_page) != 1 || first_page <= 0) {
176 fprintf(stderr, "Error: Invalid -firstPage syntax\n");
177 return 1;
178 }
179 break;
180 case SET_LAST_PAGE:
181 if (s_arg[i_arg].n_items != 2 || sscanf(s_arg[i_arg].list[1], "%ld", &last_page) != 1 || last_page <= 0) {
182 fprintf(stderr, "Error: Invalid -lastPage syntax\n");
183 return 1;
184 }
185 break;
186 case SET_INTERVAL:
187 if (s_arg[i_arg].n_items != 2 || sscanf(s_arg[i_arg].list[1], "%ld", &interval) != 1 || interval <= 0) {
188 fprintf(stderr, "Error: Invalid -interval syntax\n");
189 return 1;
190 }
191 break;
192 case SET_EXTENSION:
193 if (s_arg[i_arg].n_items != 2) {
194 fprintf(stderr, "Error: Invalid -extension syntax\n");
195 return 1;
196 }
197 extension = s_arg[i_arg].list[1];
198 break;
199 case SET_OFFSET:
200 if (s_arg[i_arg].n_items != 2 || sscanf(s_arg[i_arg].list[1], "%ld", &offset) != 1) {
201 fprintf(stderr, "Error: Invalid -offset syntax\n");
202 return 1;
203 }
204 break;
205 case SET_PIPE:
206 pipe_flags = USE_STDIN;
207 break;
208 case SET_NAMEPARAMETER:
209 if (s_arg[i_arg].n_items != 2) {
210 fprintf(stderr, "Error: Invalid -nameParameter syntax\n");
211 return 1;
212 }
213 file_parameter = s_arg[i_arg].list[1];
214 break;
215 case SET_GROUPPARAMETER:
216 if (s_arg[i_arg].n_items != 2) {
217 fprintf(stderr, "Error: Invalid -groupParameter syntax\n");
218 return 1;
219 }
220 group_parameter_name = s_arg[i_arg].list[1];
221 break;
222 default:
223 fprintf(stderr, "Error: Unknown switch: %s\n", s_arg[i_arg].list[0]);
224 fprintf(stderr, "%s", USAGE);
225 return 1;
226 }
227 } else {
228 if (!input) {
229 input = s_arg[i_arg].list[0];
230 } else {
231 fprintf(stderr, "Error: Too many filenames\n");
232 return 1;
233 }
234 }
235 }
236
237 if (!input && !(pipe_flags & USE_STDIN)) {
238 fprintf(stderr, "Error: Missing input filename\n");
239 return 1;
240 }
241
242 if (pipe_flags & USE_STDIN && !file_parameter && !rootname) {
243 fprintf(stderr, "Error: Provide -rootname or -nameParameter with -pipe\n");
244 return 1;
245 }
246
247 if (!rootname && !file_parameter) {
248 if ((rootname = strrchr(input, '.'))) {
249 *rootname = 0;
250 SDDS_CopyString(&rootname, input);
251 input[strlen(input)] = '.';
252 } else {
253 SDDS_CopyString(&rootname, input);
254 }
255 }
256
257 if (first_page && last_page && first_page > last_page) {
258 fprintf(stderr, "Error: firstPage > lastPage\n");
259 return 1;
260 }
261
262 if (!SDDS_InitializeInput(&sdds_orig, input)) {
263 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
264 return 1;
265 }
266
267 if (!extension || SDDS_StringIsBlank(extension)) {
268 extension = NULL;
269 snprintf(format, sizeof(format), "%%s%%0%ldld", digits);
270 } else {
271 snprintf(format, sizeof(format), "%%s%%0%ldld.%s", digits, extension);
272 }
273
274 if (file_parameter &&
275 SDDS_CheckParameter(&sdds_orig, file_parameter, NULL, SDDS_STRING, stderr) != SDDS_CHECK_OKAY) {
276 fprintf(stderr, "Error: Filename parameter not present or wrong type\n");
277 return 1;
278 }
279
280 last_group_parameter = NULL;
281 while ((retval = SDDS_ReadPage(&sdds_orig)) > 0) {
282 if (first_page && retval < first_page) {
283 continue;
284 }
285 if (last_page && retval > last_page) {
286 break;
287 }
288 if (interval) {
289 if (first_page) {
290 if ((retval - first_page) % interval != 0) {
291 continue;
292 }
293 } else if ((retval - 1) % interval != 0) {
294 continue;
295 }
296 }
297 if (file_parameter) {
298 if (!SDDS_GetParameter(&sdds_orig, file_parameter, &name_from_parameter)) {
299 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
300 return 1;
301 }
302 strncpy(name, name_from_parameter, sizeof(name) - 1);
303 name[sizeof(name) - 1] = '\0';
304 free(name_from_parameter);
305 } else {
306 snprintf(name, sizeof(name), format, rootname, retval - offset);
307 }
308 if (group_parameter_name) {
309 if (!SDDS_GetParameterAsString(&sdds_orig, group_parameter_name, &this_group_parameter)) {
310 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
311 return 1;
312 }
313 }
314 if (!group_parameter_name || !last_group_parameter ||
315 (group_parameter_name && strcmp(this_group_parameter, last_group_parameter))) {
316 if (file_active && !SDDS_Terminate(&sdds_dataset)) {
317 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
318 return 1;
319 }
320 file_active = 0;
321 if (!SDDS_InitializeCopy(&sdds_dataset, &sdds_orig, name, "w")) {
322 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
323 return 1;
324 }
325 if ((ascii_output && sdds_dataset.layout.data_mode.mode != SDDS_ASCII) ||
326 (binary_output && sdds_dataset.layout.data_mode.mode != SDDS_BINARY)) {
327 sdds_dataset.layout.data_mode.mode = ascii_output ? SDDS_ASCII : SDDS_BINARY;
328 }
329 sdds_dataset.layout.data_mode.column_major = (column_major_order != -1) ? column_major_order : sdds_orig.layout.data_mode.column_major;
330 if (!SDDS_WriteLayout(&sdds_dataset)) {
331 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
332 return 1;
333 }
334 file_active = 1;
335 }
336 if (group_parameter_name) {
337 free(last_group_parameter);
338 last_group_parameter = this_group_parameter;
339 this_group_parameter = NULL;
340 }
341 if (!SDDS_CopyPage(&sdds_dataset, &sdds_orig) || !SDDS_WritePage(&sdds_dataset)) {
342 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
343 return 1;
344 }
345 }
346 if (retval == 0) {
347 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
348 return 1;
349 }
350 if (!SDDS_Terminate(&sdds_orig)) {
351 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
352 return 1;
353 }
354 return 0;
355}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
char * SDDS_GetParameterAsString(SDDS_DATASET *SDDS_dataset, char *parameter_name, char **memory)
Retrieves the value of a specified parameter as a string from the current data table of an SDDS datas...
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.