SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsregroup.c
Go to the documentation of this file.
1/**
2 * @file sddsregroup.c
3 * @brief Regroups SDDS files by reorganizing rows and pages.
4 *
5 * @details
6 * This program processes an SDDS file where each page contains a fixed number
7 * of rows and produces an output file with regrouped pages. Specifically, if the input file
8 * contains m pages with n rows each, the output file will contain n pages with m rows each.
9 * The user can specify which columns from the input file will become parameters in the output file,
10 * and vice versa. Additionally, input parameters can be duplicated as extra columns in the output file.
11 *
12 * @section Usage
13 * ```
14 * sddsregroup [<inputfile>] [<outputfile>]
15 * [-pipe=[input][,output]]
16 * [-newparameters=<oldcolumnname>[,...]]
17 * [-newcolumns=<oldparametername>[,...]]
18 * [-warning]
19 * [-verbose]
20 * [-majorOrder=row|column]
21 * ```
22 *
23 * @section Options
24 * | Optional | Description |
25 * |------------------------|-----------------------------------------------------------------------------|
26 * | `-pipe` | Read input from and/or write output to a pipe. |
27 * | `-newparameters` | Specifies columns from the input file to become parameters in the output file. |
28 * | `-newcolumns` | Specifies parameters from the input file to become columns in the output file. |
29 * | `-warning` | Enables warning messages. |
30 * | `-verbose` | Enables verbose output for detailed information during execution. |
31 * | `-majorOrder` | Specifies the data order of the output file (row-major or column-major). |
32 *
33 * @copyright
34 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
35 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
36 *
37 * @license
38 * This file is distributed under the terms of the Software License Agreement
39 * found in the file LICENSE included with this distribution.
40 *
41 * @authors
42 * M. Borland,
43 * C. Saunders,
44 * R. Soliday,
45 * H. Shang
46 */
47
48#include "mdb.h"
49#include "scan.h"
50#include "match_string.h"
51#include "matlib.h"
52#include "SDDS.h"
53
54/* Enumeration for option types */
55enum option_type {
56 CLO_NEWCOLUMNS,
57 CLO_NEWPARAMETERS,
58 CLO_WARNING,
59 CLO_VERBOSE,
60 CLO_PIPE,
61 CLO_MAJOR_ORDER,
62 N_OPTIONS
63};
64
65char *commandline_option[N_OPTIONS] = {
66 "newcolumns",
67 "newparameters",
68 "warning",
69 "verbose",
70 "pipe",
71 "majorOrder",
72};
73
74static char *USAGE =
75 "sddsregroup [<inputfile>] [<outputfile>]\n"
76 " [-pipe=[input][,output]]\n"
77 " [-newparameters=<oldcolumnname>[,...]]\n"
78 " [-newcolumns=<oldparametername>[,...]]\n"
79 " [-warning]\n"
80 " [-verbose]\n"
81 " [-majorOrder=row|column]\n"
82 "Reorganizes the data in the input file by taking single rows from each page of the input file\n"
83 "to form single pages of the output file.\n\n"
84 "Options:\n"
85 " -pipe=[input][,output] Read input from and/or write output to a pipe.\n"
86 " -newparameters=<col1>[,<col2>,...]\n"
87 " Specify which columns of the input file will become\n"
88 " parameters in the output file. By default, no new parameters\n"
89 " are created, and all columns of the input file are transferred\n"
90 " to the output file.\n"
91 " -newcolumns=<param1>[,<param2>,...]\n"
92 " Specify which parameters of the input file will become\n"
93 " columns in the output file. These columns will be duplicated\n"
94 " across all pages. By default, all parameter values are lost.\n"
95 " -majorOrder=row|column Specify the data order of the output file as row-major or column-major.\n"
96 " -warning Enable warning messages.\n"
97 " -verbose Enable verbose output.\n\n"
98 "Program by Louis Emery, ANL (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
99
100int main(int argc, char **argv) {
101 SCANNED_ARG *s_arg;
102 SDDS_TABLE inputPage, *inputPages, outputPage;
103
104 char *inputfile, *outputfile;
105 char *InputDescription, *InputContents;
106 char *OutputDescription, *OutputContents;
107 char **InputParameters, **InputColumns, **ColToParNames, **ParToColNames;
108 int32_t NInputParameters, NInputColumns;
109 long NColToPar, NParToCol, NColToCol;
110 int64_t NInputRows, NOutputRows;
111 long NInputPages, NOutputPages;
112 long *ColToColInputIndex, *ColToParInputIndex, *ParToColInputIndex;
113 long *ColToColOutputIndex, *ColToParOutputIndex, *ParToColOutputIndex;
114 long pageIncrement = 20;
115
116 long i, i_arg, j;
117 long ipage;
118 int64_t row;
119 long verbose;
120 /* long warning; */
121 unsigned long pipeFlags, majorOrderFlag;
122 long tmpfile_used, noWarnings;
123 short columnMajorOrder = -1;
124
125 inputPages = NULL;
126
128 argc = scanargs(&s_arg, argc, argv);
129 if (argc == 1)
130 bomb(NULL, USAGE);
131
132 inputfile = outputfile = NULL;
133 InputDescription = InputContents = NULL;
134 OutputDescription = OutputContents = NULL;
135 InputParameters = InputColumns = ColToParNames = ParToColNames = NULL;
136 NInputParameters = NInputColumns = NColToPar = NParToCol = NColToCol = 0;
137 NInputRows = NOutputRows = NInputPages = NOutputPages = 0;
138 ColToColInputIndex = ColToParInputIndex = ParToColInputIndex = NULL;
139 ColToColOutputIndex = ColToParOutputIndex = ParToColOutputIndex = NULL;
140
141 verbose = 0;
142 /* warning = 0; */
143 pipeFlags = 0;
144 tmpfile_used = 0;
145 noWarnings = 0;
146 for (i_arg = 1; i_arg < argc; i_arg++) {
147 if (s_arg[i_arg].arg_type == OPTION) {
148 switch (match_string(s_arg[i_arg].list[0], commandline_option, N_OPTIONS, UNIQUE_MATCH)) {
149 case CLO_MAJOR_ORDER:
150 majorOrderFlag = 0;
151 s_arg[i_arg].n_items--;
152 if (s_arg[i_arg].n_items > 0 && (!scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
153 SDDS_Bomb("invalid -majorOrder syntax/values");
154 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
155 columnMajorOrder = 1;
156 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
157 columnMajorOrder = 0;
158 break;
159 case CLO_VERBOSE:
160 verbose = 1;
161 break;
162 case CLO_WARNING:
163 /* warning = 1; */
164 break;
165 case CLO_PIPE:
166 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags))
167 SDDS_Bomb("invalid -pipe syntax");
168 break;
169 case CLO_NEWCOLUMNS:
170 NParToCol = s_arg[i_arg].n_items - 1;
171 if (!NParToCol) {
172 SDDS_Bomb("No old parameter names given");
173 }
174 ParToColNames = (char **)malloc(NParToCol * sizeof(char *));
175 for (i = 0; i < NParToCol; i++) {
176 ParToColNames[i] = s_arg[i_arg].list[i + 1];
177 }
178 break;
179 case CLO_NEWPARAMETERS:
180 NColToPar = s_arg[i_arg].n_items - 1;
181 if (!NColToPar) {
182 SDDS_Bomb("No old column names given");
183 }
184 ColToParNames = (char **)malloc(NColToPar * sizeof(char *));
185 for (i = 0; i < NColToPar; i++) {
186 ColToParNames[i] = s_arg[i_arg].list[i + 1];
187 }
188 break;
189 default:
190 SDDS_Bomb("unrecognized option given");
191 }
192 } else {
193 if (!inputfile)
194 inputfile = s_arg[i_arg].list[0];
195 else if (!outputfile)
196 outputfile = s_arg[i_arg].list[0];
197 else
198 SDDS_Bomb("too many filenames given");
199 }
200 }
201
202 processFilenames("sddsregroup", &inputfile, &outputfile, pipeFlags, noWarnings, &tmpfile_used);
203
204 if (!SDDS_InitializeInput(&inputPage, inputfile))
205 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
206 if (0 < SDDS_ReadTable(&inputPage))
207 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
208 InputColumns = (char **)SDDS_GetColumnNames(&inputPage, &NInputColumns);
209 InputParameters = (char **)SDDS_GetParameterNames(&inputPage, &NInputParameters);
210 InputDescription = InputContents = NULL;
211 if (!SDDS_GetDescription(&inputPage, &InputDescription, &InputContents))
212 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
213 NInputRows = SDDS_CountRowsOfInterest(&inputPage);
214 ColToParInputIndex = (long *)malloc(NColToPar * sizeof(long));
215 ColToParOutputIndex = (long *)malloc(NColToPar * sizeof(long));
216 NColToCol = NInputColumns - NColToPar;
217 ColToColInputIndex = (long *)malloc(NColToCol * sizeof(long));
218 ColToColOutputIndex = (long *)malloc(NColToCol * sizeof(long));
219 ParToColInputIndex = (long *)malloc(NParToCol * sizeof(long));
220 ParToColOutputIndex = (long *)malloc(NParToCol * sizeof(long));
221
222 /*******************************\
223 * Check existence of selected *
224 * columns and parameters *
225 \*******************************/
226 for (i = 0; i < NColToPar; i++) {
227 switch (SDDS_CheckColumn(&inputPage, ColToParNames[i], NULL, 0, NULL)) {
228 case SDDS_CHECK_NONEXISTENT:
229 fprintf(stderr, "Error: Input file doesn't contain column %s.\n", ColToParNames[i]);
230 exit(EXIT_FAILURE);
231 }
232 }
233 for (i = 0; i < NParToCol; i++) {
234 switch (SDDS_CheckParameter(&inputPage, ParToColNames[i], NULL, 0, NULL)) {
235 case SDDS_CHECK_NONEXISTENT:
236 fprintf(stderr, "Error: Input file doesn't contain parameter %s.\n", ParToColNames[i]);
237 exit(EXIT_FAILURE);
238 }
239 }
240
241 /*****************************************\
242 * Make copies of pages of the input file *
243 \*****************************************/
244 NInputPages = 0;
245 if (verbose) {
246 init_stats();
247 }
248
249 do {
250 if (!NInputPages) {
251 inputPages = (SDDS_TABLE *)malloc(pageIncrement * sizeof(SDDS_TABLE));
252 } else if (!(NInputPages % pageIncrement)) {
253 inputPages = (SDDS_TABLE *)realloc(inputPages, (NInputPages + pageIncrement) * sizeof(SDDS_TABLE));
254 }
255 if (NInputRows != SDDS_CountRowsOfInterest(&inputPage)) {
256 fprintf(stderr, "Error: Number of rows in pages are not all equal.\n");
257 exit(EXIT_FAILURE);
258 }
259 if (!SDDS_InitializeCopy(&inputPages[NInputPages], &inputPage, NULL, "m"))
260 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
261 if (!SDDS_CopyTable(&inputPages[NInputPages], &inputPage))
262 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
263 if (verbose) {
264 fprintf(stderr, "Reading page %ld...\n", NInputPages);
265 }
266
267 NInputPages++;
268 } while (0 < SDDS_ReadTable(&inputPage));
269
270 if (!SDDS_Terminate(&inputPage))
271 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
272
273 if (InputDescription) {
274 OutputDescription = (char *)malloc((strlen(InputDescription) + strlen(", regrouped") + 1) * sizeof(char));
275 OutputDescription = strcat(strcpy(OutputDescription, InputDescription), ", regrouped");
276 } else {
277 OutputDescription = (char *)malloc((strlen("File regrouped") + strlen(inputfile ? inputfile : "from pipe") + 1) * sizeof(char));
278 sprintf(OutputDescription, "File %s regrouped", inputfile ? inputfile : "from pipe");
279 }
280 if (InputContents) {
281 OutputContents = (char *)malloc((strlen(InputContents) + strlen(", regrouped") + 1) * sizeof(char));
282 OutputContents = strcat(strcpy(OutputContents, InputContents), ", regrouped");
283 } else {
284 OutputContents = (char *)malloc((strlen("File regrouped") + strlen(inputfile ? inputfile : "from pipe") + 1) * sizeof(char));
285 sprintf(OutputContents, "File %s regrouped", inputfile ? inputfile : "from pipe");
286 }
287 if (!SDDS_InitializeOutput(&outputPage, SDDS_BINARY, 0, OutputDescription, OutputContents, outputfile))
288 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
289 if (columnMajorOrder != -1)
290 outputPage.layout.data_mode.column_major = columnMajorOrder;
291 else
292 outputPage.layout.data_mode.column_major = inputPage.layout.data_mode.column_major;
293 /*************************************************\
294 * Define columns and parameters and store indices *
295 \*************************************************/
296
297 /******************************************************\
298 * Selected input columns are transferred to parameters *
299 \******************************************************/
300 for (i = 0; i < NColToPar; i++) {
301 if (!SDDS_DefineParameterLikeColumn(&outputPage, &inputPages[0], ColToParNames[i], ColToParNames[i]))
302 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
303 ColToParInputIndex[i] = SDDS_GetColumnIndex(&inputPages[0], ColToParNames[i]);
304 ColToParOutputIndex[i] = SDDS_GetParameterIndex(&outputPage, ColToParNames[i]);
305 }
306 /****************************************************\
307 * Selected input parameters are transferred to columns *
308 \****************************************************/
309 for (i = 0; i < NParToCol; i++) {
310 if (!SDDS_DefineColumnLikeParameter(&outputPage, &inputPages[0], ParToColNames[i], ParToColNames[i]))
311 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
312 ParToColInputIndex[i] = SDDS_GetParameterIndex(&inputPages[0], ParToColNames[i]);
313 ParToColOutputIndex[i] = SDDS_GetColumnIndex(&outputPage, ParToColNames[i]);
314 }
315 /***********************************\
316 * Columns are transferred to columns *
317 \***********************************/
318 j = 0;
319 for (i = 0; i < NInputColumns; i++) {
320 if (0 > match_string(InputColumns[i], ColToParNames, NColToPar, EXACT_MATCH)) {
321 if (0 > SDDS_TransferColumnDefinition(&outputPage, &inputPages[0], InputColumns[i], InputColumns[i]))
322 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
323 ColToColInputIndex[j] = SDDS_GetColumnIndex(&inputPages[0], InputColumns[i]);
324 ColToColOutputIndex[j] = SDDS_GetColumnIndex(&outputPage, InputColumns[i]);
325 j++;
326 }
327 }
328 if (j != NColToCol)
329 SDDS_Bomb("Error: Something went wrong with counting the columns. Report to author.");
330 if (!SDDS_WriteLayout(&outputPage))
331 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
332 /*******************************\
333 * Fill pages in the output file *
334 \*******************************/
335 NOutputPages = NInputRows;
336 NOutputRows = NInputPages;
337 for (ipage = 0; ipage < NOutputPages; ipage++) {
338 if (verbose)
339 fprintf(stderr, "Starting page %ld...\n", ipage);
340 SDDS_StartTable(&outputPage, NOutputRows);
341 /* Set parameters */
342 for (i = 0; i < NColToPar; i++) {
343 if (!SDDS_SetParameters(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, ColToParOutputIndex[i], SDDS_GetValueByAbsIndex(&inputPages[0], ColToParInputIndex[i], ipage, NULL), -1))
344 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
345 }
346 /* Set columns */
347 for (i = 0; i < NParToCol; i++) {
348 /* Transfer parameters of input file to columns of output */
349 for (row = 0; row < NOutputRows; row++) {
350 if (!SDDS_SetRowValues(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, row, ParToColOutputIndex[i], SDDS_GetParameter(&inputPages[row], InputParameters[ParToColInputIndex[i]], NULL), -1))
351 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
352 }
353 }
354 for (i = 0; i < NColToCol; i++) {
355 for (row = 0; row < NOutputRows; row++) {
356 if (!SDDS_SetRowValues(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, row, ColToColOutputIndex[i], SDDS_GetValueByAbsIndex(&inputPages[row], ColToColInputIndex[i], ipage, NULL), -1))
357 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
358 }
359 }
360
361 if (!SDDS_WriteTable(&outputPage))
362 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
363 }
364
365 for (i = 0; i < NInputPages; i++) {
366 if (!SDDS_Terminate(&inputPages[i]))
367 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
368 }
369 if (inputPages)
370 free(inputPages);
371 if (!SDDS_Terminate(&outputPage))
372 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
373
374 if (tmpfile_used && !replaceFileAndBackUp(inputfile, outputfile))
375 exit(EXIT_FAILURE);
376 return EXIT_SUCCESS;
377}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
void * SDDS_GetValueByAbsIndex(SDDS_DATASET *SDDS_dataset, int32_t column_index, int64_t row_index, void *memory)
Retrieves the value from a specified column and absolute row index, optionally storing it in provided...
int32_t SDDS_GetDescription(SDDS_DATASET *SDDS_dataset, char **text, char **contents)
Retrieves the text and contents descriptions from an SDDS dataset.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_DefineColumnLikeParameter(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a column in the target dataset based on a parameter definition from the source dataset.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.