SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsregroup.c File Reference

Detailed Description

Regroups SDDS files by reorganizing rows and pages.

This program processes an SDDS file where each page contains a fixed number of rows and produces an output file with regrouped pages. Specifically, if the input file contains m pages with n rows each, the output file will contain n pages with m rows each. The user can specify which columns from the input file will become parameters in the output file, and vice versa. Additionally, input parameters can be duplicated as extra columns in the output file.

Usage

sddsregroup [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
[-newparameters=<oldcolumnname>[,...]]
[-newcolumns=<oldparametername>[,...]]
[-warning]
[-verbose]
[-majorOrder=row|column]

Options

Optional Description
-pipe Read input from and/or write output to a pipe.
-newparameters Specifies columns from the input file to become parameters in the output file.
-newcolumns Specifies parameters from the input file to become columns in the output file.
-warning Enables warning messages.
-verbose Enables verbose output for detailed information during execution.
-majorOrder Specifies the data order of the output file (row-major or column-major).
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
M. Borland, C. Saunders, R. Soliday, H. Shang

Definition in file sddsregroup.c.

#include "mdb.h"
#include "scan.h"
#include "match_string.h"
#include "matlib.h"
#include "SDDS.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 100 of file sddsregroup.c.

100 {
101 SCANNED_ARG *s_arg;
102 SDDS_TABLE inputPage, *inputPages, outputPage;
103
104 char *inputfile, *outputfile;
105 char *InputDescription, *InputContents;
106 char *OutputDescription, *OutputContents;
107 char **InputParameters, **InputColumns, **ColToParNames, **ParToColNames;
108 int32_t NInputParameters, NInputColumns;
109 long NColToPar, NParToCol, NColToCol;
110 int64_t NInputRows, NOutputRows;
111 long NInputPages, NOutputPages;
112 long *ColToColInputIndex, *ColToParInputIndex, *ParToColInputIndex;
113 long *ColToColOutputIndex, *ColToParOutputIndex, *ParToColOutputIndex;
114 long pageIncrement = 20;
115
116 long i, i_arg, j;
117 long ipage;
118 int64_t row;
119 long verbose;
120 /* long warning; */
121 unsigned long pipeFlags, majorOrderFlag;
122 long tmpfile_used, noWarnings;
123 short columnMajorOrder = -1;
124
125 inputPages = NULL;
126
128 argc = scanargs(&s_arg, argc, argv);
129 if (argc == 1)
130 bomb(NULL, USAGE);
131
132 inputfile = outputfile = NULL;
133 InputDescription = InputContents = NULL;
134 OutputDescription = OutputContents = NULL;
135 InputParameters = InputColumns = ColToParNames = ParToColNames = NULL;
136 NInputParameters = NInputColumns = NColToPar = NParToCol = NColToCol = 0;
137 NInputRows = NOutputRows = NInputPages = NOutputPages = 0;
138 ColToColInputIndex = ColToParInputIndex = ParToColInputIndex = NULL;
139 ColToColOutputIndex = ColToParOutputIndex = ParToColOutputIndex = NULL;
140
141 verbose = 0;
142 /* warning = 0; */
143 pipeFlags = 0;
144 tmpfile_used = 0;
145 noWarnings = 0;
146 for (i_arg = 1; i_arg < argc; i_arg++) {
147 if (s_arg[i_arg].arg_type == OPTION) {
148 switch (match_string(s_arg[i_arg].list[0], commandline_option, N_OPTIONS, UNIQUE_MATCH)) {
149 case CLO_MAJOR_ORDER:
150 majorOrderFlag = 0;
151 s_arg[i_arg].n_items--;
152 if (s_arg[i_arg].n_items > 0 && (!scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
153 SDDS_Bomb("invalid -majorOrder syntax/values");
154 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
155 columnMajorOrder = 1;
156 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
157 columnMajorOrder = 0;
158 break;
159 case CLO_VERBOSE:
160 verbose = 1;
161 break;
162 case CLO_WARNING:
163 /* warning = 1; */
164 break;
165 case CLO_PIPE:
166 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags))
167 SDDS_Bomb("invalid -pipe syntax");
168 break;
169 case CLO_NEWCOLUMNS:
170 NParToCol = s_arg[i_arg].n_items - 1;
171 if (!NParToCol) {
172 SDDS_Bomb("No old parameter names given");
173 }
174 ParToColNames = (char **)malloc(NParToCol * sizeof(char *));
175 for (i = 0; i < NParToCol; i++) {
176 ParToColNames[i] = s_arg[i_arg].list[i + 1];
177 }
178 break;
179 case CLO_NEWPARAMETERS:
180 NColToPar = s_arg[i_arg].n_items - 1;
181 if (!NColToPar) {
182 SDDS_Bomb("No old column names given");
183 }
184 ColToParNames = (char **)malloc(NColToPar * sizeof(char *));
185 for (i = 0; i < NColToPar; i++) {
186 ColToParNames[i] = s_arg[i_arg].list[i + 1];
187 }
188 break;
189 default:
190 SDDS_Bomb("unrecognized option given");
191 }
192 } else {
193 if (!inputfile)
194 inputfile = s_arg[i_arg].list[0];
195 else if (!outputfile)
196 outputfile = s_arg[i_arg].list[0];
197 else
198 SDDS_Bomb("too many filenames given");
199 }
200 }
201
202 processFilenames("sddsregroup", &inputfile, &outputfile, pipeFlags, noWarnings, &tmpfile_used);
203
204 if (!SDDS_InitializeInput(&inputPage, inputfile))
205 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
206 if (0 < SDDS_ReadTable(&inputPage))
207 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
208 InputColumns = (char **)SDDS_GetColumnNames(&inputPage, &NInputColumns);
209 InputParameters = (char **)SDDS_GetParameterNames(&inputPage, &NInputParameters);
210 InputDescription = InputContents = NULL;
211 if (!SDDS_GetDescription(&inputPage, &InputDescription, &InputContents))
212 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
213 NInputRows = SDDS_CountRowsOfInterest(&inputPage);
214 ColToParInputIndex = (long *)malloc(NColToPar * sizeof(long));
215 ColToParOutputIndex = (long *)malloc(NColToPar * sizeof(long));
216 NColToCol = NInputColumns - NColToPar;
217 ColToColInputIndex = (long *)malloc(NColToCol * sizeof(long));
218 ColToColOutputIndex = (long *)malloc(NColToCol * sizeof(long));
219 ParToColInputIndex = (long *)malloc(NParToCol * sizeof(long));
220 ParToColOutputIndex = (long *)malloc(NParToCol * sizeof(long));
221
222 /*******************************\
223 * Check existence of selected *
224 * columns and parameters *
225 \*******************************/
226 for (i = 0; i < NColToPar; i++) {
227 switch (SDDS_CheckColumn(&inputPage, ColToParNames[i], NULL, 0, NULL)) {
228 case SDDS_CHECK_NONEXISTENT:
229 fprintf(stderr, "Error: Input file doesn't contain column %s.\n", ColToParNames[i]);
230 exit(EXIT_FAILURE);
231 }
232 }
233 for (i = 0; i < NParToCol; i++) {
234 switch (SDDS_CheckParameter(&inputPage, ParToColNames[i], NULL, 0, NULL)) {
235 case SDDS_CHECK_NONEXISTENT:
236 fprintf(stderr, "Error: Input file doesn't contain parameter %s.\n", ParToColNames[i]);
237 exit(EXIT_FAILURE);
238 }
239 }
240
241 /*****************************************\
242 * Make copies of pages of the input file *
243 \*****************************************/
244 NInputPages = 0;
245 if (verbose) {
246 init_stats();
247 }
248
249 do {
250 if (!NInputPages) {
251 inputPages = (SDDS_TABLE *)malloc(pageIncrement * sizeof(SDDS_TABLE));
252 } else if (!(NInputPages % pageIncrement)) {
253 inputPages = (SDDS_TABLE *)realloc(inputPages, (NInputPages + pageIncrement) * sizeof(SDDS_TABLE));
254 }
255 if (NInputRows != SDDS_CountRowsOfInterest(&inputPage)) {
256 fprintf(stderr, "Error: Number of rows in pages are not all equal.\n");
257 exit(EXIT_FAILURE);
258 }
259 if (!SDDS_InitializeCopy(&inputPages[NInputPages], &inputPage, NULL, "m"))
260 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
261 if (!SDDS_CopyTable(&inputPages[NInputPages], &inputPage))
262 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
263 if (verbose) {
264 fprintf(stderr, "Reading page %ld...\n", NInputPages);
265 }
266
267 NInputPages++;
268 } while (0 < SDDS_ReadTable(&inputPage));
269
270 if (!SDDS_Terminate(&inputPage))
271 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
272
273 if (InputDescription) {
274 OutputDescription = (char *)malloc((strlen(InputDescription) + strlen(", regrouped") + 1) * sizeof(char));
275 OutputDescription = strcat(strcpy(OutputDescription, InputDescription), ", regrouped");
276 } else {
277 OutputDescription = (char *)malloc((strlen("File regrouped") + strlen(inputfile ? inputfile : "from pipe") + 1) * sizeof(char));
278 sprintf(OutputDescription, "File %s regrouped", inputfile ? inputfile : "from pipe");
279 }
280 if (InputContents) {
281 OutputContents = (char *)malloc((strlen(InputContents) + strlen(", regrouped") + 1) * sizeof(char));
282 OutputContents = strcat(strcpy(OutputContents, InputContents), ", regrouped");
283 } else {
284 OutputContents = (char *)malloc((strlen("File regrouped") + strlen(inputfile ? inputfile : "from pipe") + 1) * sizeof(char));
285 sprintf(OutputContents, "File %s regrouped", inputfile ? inputfile : "from pipe");
286 }
287 if (!SDDS_InitializeOutput(&outputPage, SDDS_BINARY, 0, OutputDescription, OutputContents, outputfile))
288 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
289 if (columnMajorOrder != -1)
290 outputPage.layout.data_mode.column_major = columnMajorOrder;
291 else
292 outputPage.layout.data_mode.column_major = inputPage.layout.data_mode.column_major;
293 /*************************************************\
294 * Define columns and parameters and store indices *
295 \*************************************************/
296
297 /******************************************************\
298 * Selected input columns are transferred to parameters *
299 \******************************************************/
300 for (i = 0; i < NColToPar; i++) {
301 if (!SDDS_DefineParameterLikeColumn(&outputPage, &inputPages[0], ColToParNames[i], ColToParNames[i]))
302 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
303 ColToParInputIndex[i] = SDDS_GetColumnIndex(&inputPages[0], ColToParNames[i]);
304 ColToParOutputIndex[i] = SDDS_GetParameterIndex(&outputPage, ColToParNames[i]);
305 }
306 /****************************************************\
307 * Selected input parameters are transferred to columns *
308 \****************************************************/
309 for (i = 0; i < NParToCol; i++) {
310 if (!SDDS_DefineColumnLikeParameter(&outputPage, &inputPages[0], ParToColNames[i], ParToColNames[i]))
311 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
312 ParToColInputIndex[i] = SDDS_GetParameterIndex(&inputPages[0], ParToColNames[i]);
313 ParToColOutputIndex[i] = SDDS_GetColumnIndex(&outputPage, ParToColNames[i]);
314 }
315 /***********************************\
316 * Columns are transferred to columns *
317 \***********************************/
318 j = 0;
319 for (i = 0; i < NInputColumns; i++) {
320 if (0 > match_string(InputColumns[i], ColToParNames, NColToPar, EXACT_MATCH)) {
321 if (0 > SDDS_TransferColumnDefinition(&outputPage, &inputPages[0], InputColumns[i], InputColumns[i]))
322 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
323 ColToColInputIndex[j] = SDDS_GetColumnIndex(&inputPages[0], InputColumns[i]);
324 ColToColOutputIndex[j] = SDDS_GetColumnIndex(&outputPage, InputColumns[i]);
325 j++;
326 }
327 }
328 if (j != NColToCol)
329 SDDS_Bomb("Error: Something went wrong with counting the columns. Report to author.");
330 if (!SDDS_WriteLayout(&outputPage))
331 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
332 /*******************************\
333 * Fill pages in the output file *
334 \*******************************/
335 NOutputPages = NInputRows;
336 NOutputRows = NInputPages;
337 for (ipage = 0; ipage < NOutputPages; ipage++) {
338 if (verbose)
339 fprintf(stderr, "Starting page %ld...\n", ipage);
340 SDDS_StartTable(&outputPage, NOutputRows);
341 /* Set parameters */
342 for (i = 0; i < NColToPar; i++) {
343 if (!SDDS_SetParameters(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, ColToParOutputIndex[i], SDDS_GetValueByAbsIndex(&inputPages[0], ColToParInputIndex[i], ipage, NULL), -1))
344 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
345 }
346 /* Set columns */
347 for (i = 0; i < NParToCol; i++) {
348 /* Transfer parameters of input file to columns of output */
349 for (row = 0; row < NOutputRows; row++) {
350 if (!SDDS_SetRowValues(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, row, ParToColOutputIndex[i], SDDS_GetParameter(&inputPages[row], InputParameters[ParToColInputIndex[i]], NULL), -1))
351 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
352 }
353 }
354 for (i = 0; i < NColToCol; i++) {
355 for (row = 0; row < NOutputRows; row++) {
356 if (!SDDS_SetRowValues(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, row, ColToColOutputIndex[i], SDDS_GetValueByAbsIndex(&inputPages[row], ColToColInputIndex[i], ipage, NULL), -1))
357 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
358 }
359 }
360
361 if (!SDDS_WriteTable(&outputPage))
362 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
363 }
364
365 for (i = 0; i < NInputPages; i++) {
366 if (!SDDS_Terminate(&inputPages[i]))
367 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
368 }
369 if (inputPages)
370 free(inputPages);
371 if (!SDDS_Terminate(&outputPage))
372 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
373
374 if (tmpfile_used && !replaceFileAndBackUp(inputfile, outputfile))
375 exit(EXIT_FAILURE);
376 return EXIT_SUCCESS;
377}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
void * SDDS_GetValueByAbsIndex(SDDS_DATASET *SDDS_dataset, int32_t column_index, int64_t row_index, void *memory)
Retrieves the value from a specified column and absolute row index, optionally storing it in provided...
int32_t SDDS_GetDescription(SDDS_DATASET *SDDS_dataset, char **text, char **contents)
Retrieves the text and contents descriptions from an SDDS dataset.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_DefineColumnLikeParameter(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a column in the target dataset based on a parameter definition from the source dataset.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.