SDDSlib
Loading...
Searching...
No Matches
sddsseparate.c
Go to the documentation of this file.
1/**
2 * @file sddsseparate.c
3 * @brief Reorganizes column data from an SDDS file onto separate pages.
4 *
5 * This program reads an SDDS (Self Describing Data Set) file and reorganizes the column data
6 * such that data from different columns ends up on separate pages. The user can specify groups
7 * of columns to be combined under a new name, or copy columns across pages.
8 *
9 * ### Features:
10 * - Group columns under a common name.
11 * - Copy specified columns across all pages.
12 * - Handle input and output through piping for flexibility.
13 *
14 * ### Usage:
15 * `sddsseparate [<inputfile>] [<outputfile>] [-pipe=[input][,output]] [-group=<newName>,<listOfOldNames>] [-copy=<listOfNames>]`
16 *
17 * ### Options:
18 * - **-pipe**: Enables piping for input and/or output.
19 * - **-group**: Groups multiple old column names under a new column name. Data from the old columns will appear on sequential pages under the new name.
20 * - **-copy**: Specifies columns to be copied across all pages.
21 *
22 * ### Example:
23 * To group columns A, B, C under a new column name "Group1" and copy column D:
24 * `sddsseparate input.sdds output.sdds -group=Group1,A,B,C -copy=D`
25 *
26 * @copyright
27 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
28 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
29 *
30 * @license
31 * This file is distributed under the terms of the Software License Agreement
32 * found in the file LICENSE included with this distribution.
33 *
34 * @authors
35 * - M. Borland
36 * - R. Soliday
37 * - H. Shang
38 */
39
40#include "mdb.h"
41#include "SDDS.h"
42#include "scan.h"
43
44/* Structure to define groups for column processing */
45typedef struct {
46 char *new_name;
47 char **users_old_name;
48 char **old_name;
49 char *parameter_name;
50 long users_old_names;
51 int32_t old_names;
52} groups_t;
53
54/* Enumeration for option types for better readability */
55typedef enum {
56 SET_GROUP,
57 SET_COPY,
58 SET_PIPE,
59 N_OPTIONS
60} option_type_t;
61
62/* List of option strings for argument matching */
63static char *option_strings[N_OPTIONS] = {
64 "group",
65 "copy",
66 "pipe",
67};
68
69/* Usage message for user reference */
70static char *usage_message =
71 "Usage: sddsseparate [<inputfile>] [<outputfile>]\n"
72 " [-pipe=[input][,output]]\n"
73 " [-group=<newName>,<listOfOldNames>] [-copy=<listOfNames>]\n"
74 "\n"
75 "Description:\n"
76 " Reorganizes the column data in the input so that data from different\n"
77 " columns ends up on different pages.\n"
78 " For each -group option, a column is created in the output that contains\n"
79 " data from the columns <listOfOldNames> on sequential pages.\n"
80 " Columns named with the -copy option are duplicated on each page.\n"
81 "\n"
82 "Examples:\n"
83 " Group columns A, B, C under a new name 'Group1' and copy column D:\n"
84 " sddsseparate input.sdds output.sdds -group=Group1,A,B,C -copy=D\n"
85 "\n"
86 "Program by Michael Borland.\n"
87 "(" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
88
89int main(int argc, char **argv) {
90 char *input = NULL;
91 char *output = NULL;
92 char **copy_column_name = NULL;
93 char **users_copy_column_name = NULL;
94 groups_t *group = NULL;
95 int32_t copy_columns = 0;
96 long users_copy_columns = 0;
97 long groups_count = 0;
98 long i_arg, i, read_code, items;
99 int64_t rows;
100 unsigned long pipe_flags = 0;
101 SCANNED_ARG *sc_arg = NULL;
102 SDDS_DATASET sdds_in, sdds_out;
103
104 /* Register the program name for error reporting */
106
107 /* Parse command-line arguments */
108 argc = scanargs(&sc_arg, argc, argv);
109 if (argc < 2)
110 bomb(usage_message, NULL);
111
112 /* Initialize variables */
113 group = NULL;
114 copy_column_name = users_copy_column_name = NULL;
115 users_copy_columns = copy_columns = groups_count = 0;
116
117 /* Process arguments */
118 for (i_arg = 1; i_arg < argc; i_arg++) {
119 if (sc_arg[i_arg].arg_type == OPTION) {
120 /* Match options and process accordingly */
121 switch (match_string(sc_arg[i_arg].list[0], option_strings, N_OPTIONS, 0)) {
122 case SET_PIPE:
123 /* Handle pipe options */
124 if (!processPipeOption(sc_arg[i_arg].list + 1, sc_arg[i_arg].n_items - 1, &pipe_flags))
125 SDDS_Bomb("invalid -pipe syntax");
126 break;
127
128 case SET_GROUP:
129 /* Handle group options */
130 items = sc_arg[i_arg].n_items - 1;
131 if (items < 2)
132 SDDS_Bomb("invalid -group syntax");
133 group = SDDS_Realloc(group, sizeof(*group) * (groups_count + 1));
134 if (!group ||
135 !SDDS_CopyString(&group[groups_count].new_name, sc_arg[i_arg].list[1]) ||
136 !(group[groups_count].users_old_name = SDDS_Malloc(sizeof(*group[groups_count].users_old_name) * (group[groups_count].users_old_names = items - 1))) ||
137 !SDDS_CopyStringArray(group[groups_count].users_old_name, sc_arg[i_arg].list + 2, group[groups_count].users_old_names))
138 SDDS_Bomb("memory allocation failure");
139 group[groups_count].old_name = NULL;
140 group[groups_count].old_names = 0;
141 groups_count++;
142 break;
143
144 case SET_COPY:
145 /* Handle copy options */
146 if (users_copy_columns)
147 SDDS_Bomb("give -copy only once");
148 users_copy_columns = sc_arg[i_arg].n_items - 1;
149 if (users_copy_columns < 1)
150 SDDS_Bomb("invalid -copy syntax");
151 users_copy_column_name = SDDS_Malloc(sizeof(*users_copy_column_name) * users_copy_columns);
152 if (!users_copy_column_name ||
153 !SDDS_CopyStringArray(users_copy_column_name, sc_arg[i_arg].list + 1, users_copy_columns))
154 SDDS_Bomb("memory allocation failure");
155 break;
156
157 default:
158 /* Handle unknown options */
159 fprintf(stderr, "error: unknown/ambiguous option: %s\n", sc_arg[i_arg].list[0]);
160 exit(EXIT_FAILURE);
161 break;
162 }
163 } else {
164 /* Process input and output filenames */
165 if (!input)
166 input = sc_arg[i_arg].list[0];
167 else if (!output)
168 output = sc_arg[i_arg].list[0];
169 else
170 SDDS_Bomb("too many filenames seen");
171 }
172 }
173
174 if (groups_count == 0)
175 SDDS_Bomb("no groups defined");
176
177 /* Process filenames and open input file */
178 processFilenames("sddsseparate", &input, &output, pipe_flags, 0, NULL);
179
180 if (!SDDS_InitializeInput(&sdds_in, input))
181 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
182
183 /* Process copy columns */
184 if (users_copy_columns) {
185 SDDS_SetColumnFlags(&sdds_in, 0);
186 for (i = 0; i < users_copy_columns; i++)
187 SDDS_SetColumnsOfInterest(&sdds_in, SDDS_MATCH_STRING, users_copy_column_name[i], SDDS_OR);
188 copy_column_name = SDDS_GetColumnNames(&sdds_in, &copy_columns);
189 if (!copy_column_name || copy_columns == 0)
190 SDDS_Bomb("no match for copy columns");
191 }
192
193 /* Process groups */
194 for (i = 0; i < groups_count; i++) {
195 long j, type = 0;
196 SDDS_SetColumnFlags(&sdds_in, 0);
197
198 for (j = 0; j < group[i].users_old_names; j++)
199 SDDS_SetColumnsOfInterest(&sdds_in, SDDS_MATCH_STRING, group[i].users_old_name[j], SDDS_OR);
200
201 group[i].old_name = SDDS_GetColumnNames(&sdds_in, &group[i].old_names);
202 if (!group[i].old_name) {
203 fprintf(stderr, "No match for group %s (sddsseparate)\n", group[i].new_name);
204 exit(EXIT_FAILURE);
205 }
206
207 if (i > 0 && group[i - 1].old_names != group[i].old_names) {
208 fprintf(stderr, "Group %s comprises %" PRId32 " columns, whereas the last group comprises %" PRId32 " (sddsseparate)\n",
209 group[i].new_name, group[i].old_names, group[i - 1].old_names);
210 exit(EXIT_FAILURE);
211 }
212
213 type = SDDS_GetColumnType(&sdds_in, SDDS_GetColumnIndex(&sdds_in, group[i].old_name[0]));
214 for (j = 1; j < group[i].old_names; j++) {
215 if (type != SDDS_GetColumnType(&sdds_in, SDDS_GetColumnIndex(&sdds_in, group[i].old_name[j]))) {
216 fprintf(stderr, "Inconsistent data types in group %s (sddsseparate)\n", group[i].new_name);
217 fprintf(stderr, "First inconsistent column is %s\n", group[i].old_name[j]);
218 exit(EXIT_FAILURE);
219 }
220 }
221 }
222
223 /* Initialize output file */
224 if (!SDDS_InitializeOutput(&sdds_out, SDDS_BINARY, 0, NULL, NULL, output) ||
225 !SDDS_TransferAllParameterDefinitions(&sdds_out, &sdds_in, 0))
226 SDDS_Bomb("problem initializing output file");
227
228 for (i = 0; i < copy_columns; i++) {
229 if (!SDDS_TransferColumnDefinition(&sdds_out, &sdds_in, copy_column_name[i], NULL))
230 SDDS_Bomb("problem transferring copy column definitions to output file");
231 }
232
233 for (i = 0; i < groups_count; i++) {
234 char *name;
235 if (!SDDS_TransferColumnDefinition(&sdds_out, &sdds_in, group[i].old_name[0], group[i].new_name)) {
236 fprintf(stderr, "Problem transferring column %s as %s to output file (sddsseparate)\n",
237 group[i].old_name[0], group[i].new_name);
238 exit(EXIT_FAILURE);
239 }
240
241 group[i].parameter_name = SDDS_Malloc(sizeof(*name) * (strlen(group[i].new_name) + 100));
242 if (!group[i].parameter_name)
243 SDDS_Bomb("memory allocation failure");
244
245 sprintf(group[i].parameter_name, "%sSourceColumn", group[i].new_name);
246 if (!SDDS_DefineSimpleParameter(&sdds_out, group[i].parameter_name, NULL, SDDS_STRING))
247 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
248 }
249
250 if (!SDDS_WriteLayout(&sdds_out))
251 SDDS_Bomb("problem writing layout to output file");
252
253 /* Process each page */
254 while ((read_code = SDDS_ReadPage(&sdds_in)) > 0) {
255 rows = SDDS_CountRowsOfInterest(&sdds_in);
256 if (rows < 0)
257 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
258 if (rows == 0)
259 continue;
260
261 for (i = 0; i < group[0].old_names; i++) {
262 long ic, ig;
263
264 if (!SDDS_StartPage(&sdds_out, rows) ||
265 !SDDS_CopyParameters(&sdds_out, &sdds_in))
266 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
267
268 for (ic = 0; ic < copy_columns; ic++) {
269 void *data = SDDS_GetInternalColumn(&sdds_in, copy_column_name[ic]);
270 if (!data ||
271 !SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, data, rows, copy_column_name[ic]))
272 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
273 }
274
275 for (ig = 0; ig < groups_count; ig++) {
276 void *data;
277
278 if (!SDDS_SetParameters(&sdds_out, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
279 group[ig].parameter_name, group[ig].old_name[i], NULL))
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281
282 data = SDDS_GetInternalColumn(&sdds_in, group[ig].old_name[i]);
283 if (!data ||
284 !SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, data, rows, group[ig].new_name))
285 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
286 }
287
288 if (!SDDS_WritePage(&sdds_out))
289 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
290 }
291 }
292
293 /* Terminate input and output files */
294 if (!SDDS_Terminate(&sdds_in)) {
295 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
296 exit(EXIT_FAILURE);
297 }
298
299 if (!SDDS_Terminate(&sdds_out)) {
300 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
301 exit(EXIT_FAILURE);
302 }
303
304 return 0;
305}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
int32_t SDDS_CopyStringArray(char **target, char **source, int64_t n_strings)
Copies an array of strings from source to target.
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390