SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsseparate.c
Go to the documentation of this file.
1/**
2 * @file sddsseparate.c
3 * @brief Reorganizes column data from an SDDS file onto separate pages.
4 *
5 * @details
6 * This program reads an SDDS (Self Describing Data Set) file and reorganizes the column data
7 * such that data from different columns ends up on separate pages. The user can specify groups
8 * of columns to be combined under a new name, or copy columns across pages.
9 *
10 * @section Usage
11 * ```
12 * sddsseparate [<inputfile>] [<outputfile>]
13 * [-pipe=[input][,output]]
14 * [-group=<newName>,<listOfOldNames>]
15 * [-copy=<listOfNames>]
16 * ```
17 *
18 * @section Options
19 * | Optional | Description |
20 * |----------|-------------|
21 * | `-pipe` | Enables piping for input and/or output. |
22 * | `-group` | Groups multiple old column names under a new column name. Data from the old columns will appear on sequential pages under the new name. |
23 * | `-copy` | Specifies columns to be copied across all pages. |
24 *
25 * @subsection Incompatibilities
26 * - The same column cannot appear in both `-group` and `-copy` lists.
27 *
28 * @subsection spec_req Specific Requirements
29 * - For `-group`:
30 * - All columns in a group must have consistent data types.
31 *
32 * @copyright
33 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
34 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
35 *
36 * @license
37 * This file is distributed under the terms of the Software License Agreement
38 * found in the file LICENSE included with this distribution.
39 *
40 * @authors
41 * - M. Borland
42 * - R. Soliday
43 * - H. Shang
44 */
45
46#include "mdb.h"
47#include "SDDS.h"
48#include "scan.h"
49
50/* Structure to define groups for column processing */
51typedef struct {
52 char *new_name;
53 char **users_old_name;
54 char **old_name;
55 char *parameter_name;
56 long users_old_names;
57 int32_t old_names;
58} groups_t;
59
60/* Enumeration for option types for better readability */
61typedef enum {
62 SET_GROUP,
63 SET_COPY,
64 SET_PIPE,
65 N_OPTIONS
66} option_type_t;
67
68/* List of option strings for argument matching */
69static char *option_strings[N_OPTIONS] = {
70 "group",
71 "copy",
72 "pipe",
73};
74
75/* Usage message for user reference */
76static char *usage_message =
77 "sddsseparate [<inputfile>] [<outputfile>]\n"
78 " [-pipe=[input][,output]]\n"
79 " [-group=<newName>,<listOfOldNames>]\n"
80 " [-copy=<listOfNames>]\n"
81 "Description:\n"
82 " Reorganizes the column data in the input so that data from different\n"
83 " columns ends up on different pages.\n"
84 " For each -group option, a column is created in the output that contains\n"
85 " data from the columns <listOfOldNames> on sequential pages.\n"
86 " Columns named with the -copy option are duplicated on each page.\n"
87 "\n"
88 "Examples:\n"
89 " Group columns A, B, C under a new name 'Group1' and copy column D:\n"
90 " sddsseparate input.sdds output.sdds -group=Group1,A,B,C -copy=D\n"
91 "\n"
92 "Program by Michael Borland.\n"
93 "(" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
94
95int main(int argc, char **argv) {
96 char *input = NULL;
97 char *output = NULL;
98 char **copy_column_name = NULL;
99 char **users_copy_column_name = NULL;
100 groups_t *group = NULL;
101 int32_t copy_columns = 0;
102 long users_copy_columns = 0;
103 long groups_count = 0;
104 long i_arg, i, read_code, items;
105 int64_t rows;
106 unsigned long pipe_flags = 0;
107 SCANNED_ARG *sc_arg = NULL;
108 SDDS_DATASET sdds_in, sdds_out;
109
110 /* Register the program name for error reporting */
112
113 /* Parse command-line arguments */
114 argc = scanargs(&sc_arg, argc, argv);
115 if (argc < 2)
116 bomb(NULL, usage_message);
117
118 /* Initialize variables */
119 group = NULL;
120 copy_column_name = users_copy_column_name = NULL;
121 users_copy_columns = copy_columns = groups_count = 0;
122
123 /* Process arguments */
124 for (i_arg = 1; i_arg < argc; i_arg++) {
125 if (sc_arg[i_arg].arg_type == OPTION) {
126 /* Match options and process accordingly */
127 switch (match_string(sc_arg[i_arg].list[0], option_strings, N_OPTIONS, 0)) {
128 case SET_PIPE:
129 /* Handle pipe options */
130 if (!processPipeOption(sc_arg[i_arg].list + 1, sc_arg[i_arg].n_items - 1, &pipe_flags))
131 SDDS_Bomb("invalid -pipe syntax");
132 break;
133
134 case SET_GROUP:
135 /* Handle group options */
136 items = sc_arg[i_arg].n_items - 1;
137 if (items < 2)
138 SDDS_Bomb("invalid -group syntax");
139 group = SDDS_Realloc(group, sizeof(*group) * (groups_count + 1));
140 if (!group ||
141 !SDDS_CopyString(&group[groups_count].new_name, sc_arg[i_arg].list[1]) ||
142 !(group[groups_count].users_old_name = SDDS_Malloc(sizeof(*group[groups_count].users_old_name) * (group[groups_count].users_old_names = items - 1))) ||
143 !SDDS_CopyStringArray(group[groups_count].users_old_name, sc_arg[i_arg].list + 2, group[groups_count].users_old_names))
144 SDDS_Bomb("memory allocation failure");
145 group[groups_count].old_name = NULL;
146 group[groups_count].old_names = 0;
147 groups_count++;
148 break;
149
150 case SET_COPY:
151 /* Handle copy options */
152 if (users_copy_columns)
153 SDDS_Bomb("give -copy only once");
154 users_copy_columns = sc_arg[i_arg].n_items - 1;
155 if (users_copy_columns < 1)
156 SDDS_Bomb("invalid -copy syntax");
157 users_copy_column_name = SDDS_Malloc(sizeof(*users_copy_column_name) * users_copy_columns);
158 if (!users_copy_column_name ||
159 !SDDS_CopyStringArray(users_copy_column_name, sc_arg[i_arg].list + 1, users_copy_columns))
160 SDDS_Bomb("memory allocation failure");
161 break;
162
163 default:
164 /* Handle unknown options */
165 fprintf(stderr, "error: unknown/ambiguous option: %s\n", sc_arg[i_arg].list[0]);
166 exit(EXIT_FAILURE);
167 break;
168 }
169 } else {
170 /* Process input and output filenames */
171 if (!input)
172 input = sc_arg[i_arg].list[0];
173 else if (!output)
174 output = sc_arg[i_arg].list[0];
175 else
176 SDDS_Bomb("too many filenames seen");
177 }
178 }
179
180 if (groups_count == 0)
181 SDDS_Bomb("no groups defined");
182
183 /* Process filenames and open input file */
184 processFilenames("sddsseparate", &input, &output, pipe_flags, 0, NULL);
185
186 if (!SDDS_InitializeInput(&sdds_in, input))
187 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
188
189 /* Process copy columns */
190 if (users_copy_columns) {
191 SDDS_SetColumnFlags(&sdds_in, 0);
192 for (i = 0; i < users_copy_columns; i++)
193 SDDS_SetColumnsOfInterest(&sdds_in, SDDS_MATCH_STRING, users_copy_column_name[i], SDDS_OR);
194 copy_column_name = SDDS_GetColumnNames(&sdds_in, &copy_columns);
195 if (!copy_column_name || copy_columns == 0)
196 SDDS_Bomb("no match for copy columns");
197 }
198
199 /* Process groups */
200 for (i = 0; i < groups_count; i++) {
201 long j, type = 0;
202 SDDS_SetColumnFlags(&sdds_in, 0);
203
204 for (j = 0; j < group[i].users_old_names; j++)
205 SDDS_SetColumnsOfInterest(&sdds_in, SDDS_MATCH_STRING, group[i].users_old_name[j], SDDS_OR);
206
207 group[i].old_name = SDDS_GetColumnNames(&sdds_in, &group[i].old_names);
208 if (!group[i].old_name) {
209 fprintf(stderr, "No match for group %s (sddsseparate)\n", group[i].new_name);
210 exit(EXIT_FAILURE);
211 }
212
213 if (i > 0 && group[i - 1].old_names != group[i].old_names) {
214 fprintf(stderr, "Group %s comprises %" PRId32 " columns, whereas the last group comprises %" PRId32 " (sddsseparate)\n",
215 group[i].new_name, group[i].old_names, group[i - 1].old_names);
216 exit(EXIT_FAILURE);
217 }
218
219 type = SDDS_GetColumnType(&sdds_in, SDDS_GetColumnIndex(&sdds_in, group[i].old_name[0]));
220 for (j = 1; j < group[i].old_names; j++) {
221 if (type != SDDS_GetColumnType(&sdds_in, SDDS_GetColumnIndex(&sdds_in, group[i].old_name[j]))) {
222 fprintf(stderr, "Inconsistent data types in group %s (sddsseparate)\n", group[i].new_name);
223 fprintf(stderr, "First inconsistent column is %s\n", group[i].old_name[j]);
224 exit(EXIT_FAILURE);
225 }
226 }
227 }
228
229 /* Initialize output file */
230 if (!SDDS_InitializeOutput(&sdds_out, SDDS_BINARY, 0, NULL, NULL, output) ||
231 !SDDS_TransferAllParameterDefinitions(&sdds_out, &sdds_in, 0))
232 SDDS_Bomb("problem initializing output file");
233
234 for (i = 0; i < copy_columns; i++) {
235 if (!SDDS_TransferColumnDefinition(&sdds_out, &sdds_in, copy_column_name[i], NULL))
236 SDDS_Bomb("problem transferring copy column definitions to output file");
237 }
238
239 for (i = 0; i < groups_count; i++) {
240 char *name;
241 if (!SDDS_TransferColumnDefinition(&sdds_out, &sdds_in, group[i].old_name[0], group[i].new_name)) {
242 fprintf(stderr, "Problem transferring column %s as %s to output file (sddsseparate)\n",
243 group[i].old_name[0], group[i].new_name);
244 exit(EXIT_FAILURE);
245 }
246
247 group[i].parameter_name = SDDS_Malloc(sizeof(*name) * (strlen(group[i].new_name) + 100));
248 if (!group[i].parameter_name)
249 SDDS_Bomb("memory allocation failure");
250
251 sprintf(group[i].parameter_name, "%sSourceColumn", group[i].new_name);
252 if (!SDDS_DefineSimpleParameter(&sdds_out, group[i].parameter_name, NULL, SDDS_STRING))
253 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
254 }
255
256 if (!SDDS_WriteLayout(&sdds_out))
257 SDDS_Bomb("problem writing layout to output file");
258
259 /* Process each page */
260 while ((read_code = SDDS_ReadPage(&sdds_in)) > 0) {
261 rows = SDDS_CountRowsOfInterest(&sdds_in);
262 if (rows < 0)
263 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
264 if (rows == 0)
265 continue;
266
267 for (i = 0; i < group[0].old_names; i++) {
268 long ic, ig;
269
270 if (!SDDS_StartPage(&sdds_out, rows) ||
271 !SDDS_CopyParameters(&sdds_out, &sdds_in))
272 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
273
274 for (ic = 0; ic < copy_columns; ic++) {
275 void *data = SDDS_GetInternalColumn(&sdds_in, copy_column_name[ic]);
276 if (!data ||
277 !SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, data, rows, copy_column_name[ic]))
278 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
279 }
280
281 for (ig = 0; ig < groups_count; ig++) {
282 void *data;
283
284 if (!SDDS_SetParameters(&sdds_out, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
285 group[ig].parameter_name, group[ig].old_name[i], NULL))
286 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
287
288 data = SDDS_GetInternalColumn(&sdds_in, group[ig].old_name[i]);
289 if (!data ||
290 !SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, data, rows, group[ig].new_name))
291 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
292 }
293
294 if (!SDDS_WritePage(&sdds_out))
295 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
296 }
297 }
298
299 /* Terminate input and output files */
300 if (!SDDS_Terminate(&sdds_in)) {
301 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
302 exit(EXIT_FAILURE);
303 }
304
305 if (!SDDS_Terminate(&sdds_out)) {
306 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
307 exit(EXIT_FAILURE);
308 }
309
310 return 0;
311}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
int32_t SDDS_CopyStringArray(char **target, char **source, int64_t n_strings)
Copies an array of strings from source to target.
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390