SDDSlib
Loading...
Searching...
No Matches
sddsregroup.c File Reference

Regroups SDDS files by reorganizing rows and pages. More...

#include "mdb.h"
#include "scan.h"
#include "match_string.h"
#include "matlib.h"
#include "SDDS.h"

Go to the source code of this file.

Enumerations

enum  option_type {
  CLO_NEWCOLUMNS , CLO_NEWPARAMETERS , CLO_WARNING , CLO_VERBOSE ,
  CLO_PIPE , CLO_MAJOR_ORDER , N_OPTIONS
}
 

Functions

int main (int argc, char **argv)
 

Variables

char * commandline_option [N_OPTIONS]
 
static char * USAGE
 

Detailed Description

Regroups SDDS files by reorganizing rows and pages.

This program processes an SDDS file where each page contains a fixed number of rows and produces an output file with regrouped pages. If the input file contains m pages with n rows each, the output file will contain n pages with m rows each. Users can specify which columns from the input file will become parameters in the output file and vice versa. Additionally, parameters from the input file can be duplicated as extra columns in the output file.

Features

  • Transforms rows into pages and vice versa.
  • Allows selection of input columns to be converted to output parameters.
  • Supports selection of input parameters to be converted to output columns.
  • Outputs data in either row-major or column-major order as specified.
  • Handles SDDS_PIPE options for input and output via pipes.

Usage

sddsregroup <inputfile> <outputfile> [OPTIONS]

Options

  • -pipe=[input][,output]
    Read input from and/or write output to a pipe.
  • -newparameters=<col1>[,<col2>,...]
    Specifies columns from the input file to become parameters in the output file.
  • -newcolumns=<param1>[,<param2>,...]
    Specifies parameters from the input file to become columns in the output file.
  • -majorOrder=row|column
    Specifies the data order of the output file (row-major or column-major).
  • -warning
    Enables warning messages.
  • -verbose
    Enables verbose output for detailed information during execution.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, C. Saunders, R. Soliday, H. Shang

Definition in file sddsregroup.c.

Enumeration Type Documentation

◆ option_type

enum option_type

Definition at line 54 of file sddsregroup.c.

54 {
55 CLO_NEWCOLUMNS,
56 CLO_NEWPARAMETERS,
57 CLO_WARNING,
58 CLO_VERBOSE,
59 CLO_PIPE,
60 CLO_MAJOR_ORDER,
61 N_OPTIONS
62};

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 93 of file sddsregroup.c.

93 {
94 SCANNED_ARG *s_arg;
95 SDDS_TABLE inputPage, *inputPages, outputPage;
96
97 char *inputfile, *outputfile;
98 char *InputDescription, *InputContents;
99 char *OutputDescription, *OutputContents;
100 char **InputParameters, **InputColumns, **ColToParNames, **ParToColNames;
101 int32_t NInputParameters, NInputColumns;
102 long NColToPar, NParToCol, NColToCol;
103 int64_t NInputRows, NOutputRows;
104 long NInputPages, NOutputPages;
105 long *ColToColInputIndex, *ColToParInputIndex, *ParToColInputIndex;
106 long *ColToColOutputIndex, *ColToParOutputIndex, *ParToColOutputIndex;
107 long pageIncrement = 20;
108
109 long i, i_arg, j;
110 long ipage;
111 int64_t row;
112 long verbose;
113 /* long warning; */
114 unsigned long pipeFlags, majorOrderFlag;
115 long tmpfile_used, noWarnings;
116 short columnMajorOrder = -1;
117
118 inputPages = NULL;
119
121 argc = scanargs(&s_arg, argc, argv);
122 if (argc == 1)
123 bomb(NULL, USAGE);
124
125 inputfile = outputfile = NULL;
126 InputDescription = InputContents = NULL;
127 OutputDescription = OutputContents = NULL;
128 InputParameters = InputColumns = ColToParNames = ParToColNames = NULL;
129 NInputParameters = NInputColumns = NColToPar = NParToCol = NColToCol = 0;
130 NInputRows = NOutputRows = NInputPages = NOutputPages = 0;
131 ColToColInputIndex = ColToParInputIndex = ParToColInputIndex = NULL;
132 ColToColOutputIndex = ColToParOutputIndex = ParToColOutputIndex = NULL;
133
134 verbose = 0;
135 /* warning = 0; */
136 pipeFlags = 0;
137 tmpfile_used = 0;
138 noWarnings = 0;
139 for (i_arg = 1; i_arg < argc; i_arg++) {
140 if (s_arg[i_arg].arg_type == OPTION) {
141 switch (match_string(s_arg[i_arg].list[0], commandline_option, N_OPTIONS, UNIQUE_MATCH)) {
142 case CLO_MAJOR_ORDER:
143 majorOrderFlag = 0;
144 s_arg[i_arg].n_items--;
145 if (s_arg[i_arg].n_items > 0 && (!scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
146 SDDS_Bomb("invalid -majorOrder syntax/values");
147 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
148 columnMajorOrder = 1;
149 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
150 columnMajorOrder = 0;
151 break;
152 case CLO_VERBOSE:
153 verbose = 1;
154 break;
155 case CLO_WARNING:
156 /* warning = 1; */
157 break;
158 case CLO_PIPE:
159 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags))
160 SDDS_Bomb("invalid -pipe syntax");
161 break;
162 case CLO_NEWCOLUMNS:
163 NParToCol = s_arg[i_arg].n_items - 1;
164 if (!NParToCol) {
165 SDDS_Bomb("No old parameter names given");
166 }
167 ParToColNames = (char **)malloc(NParToCol * sizeof(char *));
168 for (i = 0; i < NParToCol; i++) {
169 ParToColNames[i] = s_arg[i_arg].list[i + 1];
170 }
171 break;
172 case CLO_NEWPARAMETERS:
173 NColToPar = s_arg[i_arg].n_items - 1;
174 if (!NColToPar) {
175 SDDS_Bomb("No old column names given");
176 }
177 ColToParNames = (char **)malloc(NColToPar * sizeof(char *));
178 for (i = 0; i < NColToPar; i++) {
179 ColToParNames[i] = s_arg[i_arg].list[i + 1];
180 }
181 break;
182 default:
183 SDDS_Bomb("unrecognized option given");
184 }
185 } else {
186 if (!inputfile)
187 inputfile = s_arg[i_arg].list[0];
188 else if (!outputfile)
189 outputfile = s_arg[i_arg].list[0];
190 else
191 SDDS_Bomb("too many filenames given");
192 }
193 }
194
195 processFilenames("sddsregroup", &inputfile, &outputfile, pipeFlags, noWarnings, &tmpfile_used);
196
197 if (!SDDS_InitializeInput(&inputPage, inputfile))
198 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
199 if (0 < SDDS_ReadTable(&inputPage))
200 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
201 InputColumns = (char **)SDDS_GetColumnNames(&inputPage, &NInputColumns);
202 InputParameters = (char **)SDDS_GetParameterNames(&inputPage, &NInputParameters);
203 InputDescription = InputContents = NULL;
204 if (!SDDS_GetDescription(&inputPage, &InputDescription, &InputContents))
205 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
206 NInputRows = SDDS_CountRowsOfInterest(&inputPage);
207 ColToParInputIndex = (long *)malloc(NColToPar * sizeof(long));
208 ColToParOutputIndex = (long *)malloc(NColToPar * sizeof(long));
209 NColToCol = NInputColumns - NColToPar;
210 ColToColInputIndex = (long *)malloc(NColToCol * sizeof(long));
211 ColToColOutputIndex = (long *)malloc(NColToCol * sizeof(long));
212 ParToColInputIndex = (long *)malloc(NParToCol * sizeof(long));
213 ParToColOutputIndex = (long *)malloc(NParToCol * sizeof(long));
214
215 /*******************************\
216 * Check existence of selected *
217 * columns and parameters *
218 \*******************************/
219 for (i = 0; i < NColToPar; i++) {
220 switch (SDDS_CheckColumn(&inputPage, ColToParNames[i], NULL, 0, NULL)) {
221 case SDDS_CHECK_NONEXISTENT:
222 fprintf(stderr, "Error: Input file doesn't contain column %s.\n", ColToParNames[i]);
223 exit(EXIT_FAILURE);
224 }
225 }
226 for (i = 0; i < NParToCol; i++) {
227 switch (SDDS_CheckParameter(&inputPage, ParToColNames[i], NULL, 0, NULL)) {
228 case SDDS_CHECK_NONEXISTENT:
229 fprintf(stderr, "Error: Input file doesn't contain parameter %s.\n", ParToColNames[i]);
230 exit(EXIT_FAILURE);
231 }
232 }
233
234 /*****************************************\
235 * Make copies of pages of the input file *
236 \*****************************************/
237 NInputPages = 0;
238 if (verbose) {
239 init_stats();
240 }
241
242 do {
243 if (!NInputPages) {
244 inputPages = (SDDS_TABLE *)malloc(pageIncrement * sizeof(SDDS_TABLE));
245 } else if (!(NInputPages % pageIncrement)) {
246 inputPages = (SDDS_TABLE *)realloc(inputPages, (NInputPages + pageIncrement) * sizeof(SDDS_TABLE));
247 }
248 if (NInputRows != SDDS_CountRowsOfInterest(&inputPage)) {
249 fprintf(stderr, "Error: Number of rows in pages are not all equal.\n");
250 exit(EXIT_FAILURE);
251 }
252 if (!SDDS_InitializeCopy(&inputPages[NInputPages], &inputPage, NULL, "m"))
253 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
254 if (!SDDS_CopyTable(&inputPages[NInputPages], &inputPage))
255 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
256 if (verbose) {
257 fprintf(stderr, "Reading page %ld...\n", NInputPages);
258 }
259
260 NInputPages++;
261 } while (0 < SDDS_ReadTable(&inputPage));
262
263 if (!SDDS_Terminate(&inputPage))
264 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
265
266 if (InputDescription) {
267 OutputDescription = (char *)malloc((strlen(InputDescription) + strlen(", regrouped") + 1) * sizeof(char));
268 OutputDescription = strcat(strcpy(OutputDescription, InputDescription), ", regrouped");
269 } else {
270 OutputDescription = (char *)malloc((strlen("File regrouped") + strlen(inputfile ? inputfile : "from pipe") + 1) * sizeof(char));
271 sprintf(OutputDescription, "File %s regrouped", inputfile ? inputfile : "from pipe");
272 }
273 if (InputContents) {
274 OutputContents = (char *)malloc((strlen(InputContents) + strlen(", regrouped") + 1) * sizeof(char));
275 OutputContents = strcat(strcpy(OutputContents, InputContents), ", regrouped");
276 } else {
277 OutputContents = (char *)malloc((strlen("File regrouped") + strlen(inputfile ? inputfile : "from pipe") + 1) * sizeof(char));
278 sprintf(OutputContents, "File %s regrouped", inputfile ? inputfile : "from pipe");
279 }
280 if (!SDDS_InitializeOutput(&outputPage, SDDS_BINARY, 0, OutputDescription, OutputContents, outputfile))
281 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
282 if (columnMajorOrder != -1)
283 outputPage.layout.data_mode.column_major = columnMajorOrder;
284 else
285 outputPage.layout.data_mode.column_major = inputPage.layout.data_mode.column_major;
286 /*************************************************\
287 * Define columns and parameters and store indices *
288 \*************************************************/
289
290 /******************************************************\
291 * Selected input columns are transferred to parameters *
292 \******************************************************/
293 for (i = 0; i < NColToPar; i++) {
294 if (!SDDS_DefineParameterLikeColumn(&outputPage, &inputPages[0], ColToParNames[i], ColToParNames[i]))
295 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
296 ColToParInputIndex[i] = SDDS_GetColumnIndex(&inputPages[0], ColToParNames[i]);
297 ColToParOutputIndex[i] = SDDS_GetParameterIndex(&outputPage, ColToParNames[i]);
298 }
299 /****************************************************\
300 * Selected input parameters are transferred to columns *
301 \****************************************************/
302 for (i = 0; i < NParToCol; i++) {
303 if (!SDDS_DefineColumnLikeParameter(&outputPage, &inputPages[0], ParToColNames[i], ParToColNames[i]))
304 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
305 ParToColInputIndex[i] = SDDS_GetParameterIndex(&inputPages[0], ParToColNames[i]);
306 ParToColOutputIndex[i] = SDDS_GetColumnIndex(&outputPage, ParToColNames[i]);
307 }
308 /***********************************\
309 * Columns are transferred to columns *
310 \***********************************/
311 j = 0;
312 for (i = 0; i < NInputColumns; i++) {
313 if (0 > match_string(InputColumns[i], ColToParNames, NColToPar, EXACT_MATCH)) {
314 if (0 > SDDS_TransferColumnDefinition(&outputPage, &inputPages[0], InputColumns[i], InputColumns[i]))
315 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
316 ColToColInputIndex[j] = SDDS_GetColumnIndex(&inputPages[0], InputColumns[i]);
317 ColToColOutputIndex[j] = SDDS_GetColumnIndex(&outputPage, InputColumns[i]);
318 j++;
319 }
320 }
321 if (j != NColToCol)
322 SDDS_Bomb("Error: Something went wrong with counting the columns. Report to author.");
323 if (!SDDS_WriteLayout(&outputPage))
324 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
325 /*******************************\
326 * Fill pages in the output file *
327 \*******************************/
328 NOutputPages = NInputRows;
329 NOutputRows = NInputPages;
330 for (ipage = 0; ipage < NOutputPages; ipage++) {
331 if (verbose)
332 fprintf(stderr, "Starting page %ld...\n", ipage);
333 SDDS_StartTable(&outputPage, NOutputRows);
334 /* Set parameters */
335 for (i = 0; i < NColToPar; i++) {
336 if (!SDDS_SetParameters(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, ColToParOutputIndex[i], SDDS_GetValueByAbsIndex(&inputPages[0], ColToParInputIndex[i], ipage, NULL), -1))
337 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
338 }
339 /* Set columns */
340 for (i = 0; i < NParToCol; i++) {
341 /* Transfer parameters of input file to columns of output */
342 for (row = 0; row < NOutputRows; row++) {
343 if (!SDDS_SetRowValues(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, row, ParToColOutputIndex[i], SDDS_GetParameter(&inputPages[row], InputParameters[ParToColInputIndex[i]], NULL), -1))
344 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
345 }
346 }
347 for (i = 0; i < NColToCol; i++) {
348 for (row = 0; row < NOutputRows; row++) {
349 if (!SDDS_SetRowValues(&outputPage, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, row, ColToColOutputIndex[i], SDDS_GetValueByAbsIndex(&inputPages[row], ColToColInputIndex[i], ipage, NULL), -1))
350 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
351 }
352 }
353
354 if (!SDDS_WriteTable(&outputPage))
355 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
356 }
357
358 for (i = 0; i < NInputPages; i++) {
359 if (!SDDS_Terminate(&inputPages[i]))
360 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
361 }
362 if (inputPages)
363 free(inputPages);
364 if (!SDDS_Terminate(&outputPage))
365 SDDS_PrintErrors(stdout, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
366
367 if (tmpfile_used && !replaceFileAndBackUp(inputfile, outputfile))
368 exit(EXIT_FAILURE);
369 return EXIT_SUCCESS;
370}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
void * SDDS_GetValueByAbsIndex(SDDS_DATASET *SDDS_dataset, int32_t column_index, int64_t row_index, void *memory)
Retrieves the value from a specified column and absolute row index, optionally storing it in provided...
int32_t SDDS_GetDescription(SDDS_DATASET *SDDS_dataset, char **text, char **contents)
Retrieves the text and contents descriptions from an SDDS dataset.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_DefineColumnLikeParameter(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a column in the target dataset based on a parameter definition from the source dataset.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

Variable Documentation

◆ commandline_option

char* commandline_option[N_OPTIONS]
Initial value:
= {
"newcolumns",
"newparameters",
"warning",
"verbose",
"pipe",
"majorOrder",
}

Definition at line 64 of file sddsregroup.c.

64 {
65 "newcolumns",
66 "newparameters",
67 "warning",
68 "verbose",
69 "pipe",
70 "majorOrder",
71};

◆ USAGE

char* USAGE
static
Initial value:
=
"Usage: sddsregroup <inputfile> <outputfile> [OPTIONS]\n\n"
"Reorganizes the data in the input file by taking single rows from each page of the input file\n"
"to form single pages of the output file.\n\n"
"Options:\n"
" -pipe=[input][,output] Read input from and/or write output to a pipe.\n"
" -newparameters=<col1>[,<col2>,...]\n"
" Specify which columns of the input file will become\n"
" parameters in the output file. By default, no new parameters\n"
" are created, and all columns of the input file are transferred\n"
" to the output file.\n"
" -newcolumns=<param1>[,<param2>,...]\n"
" Specify which parameters of the input file will become\n"
" columns in the output file. These columns will be duplicated\n"
" across all pages. By default, all parameter values are lost.\n"
" -majorOrder=row|column Specify the data order of the output file as row-major or column-major.\n"
" -warning Enable warning messages.\n"
" -verbose Enable verbose output.\n\n"
"Program by Louis Emery, ANL (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n"

Definition at line 73 of file sddsregroup.c.