SDDSlib
Loading...
Searching...
No Matches
sddsexpand.c
Go to the documentation of this file.
1/**
2 * @file sddsexpand.c
3 * @brief A program for converting SDDS column data into parameters in a new SDDS file.
4 *
5 * This program reads an SDDS (Self Describing Data Set) file and creates a new SDDS file where
6 * columns in the input file are converted to parameters. Each page in the output file corresponds
7 * to a row in the input file. This functionality is effectively the inverse of the sddscollapse
8 * program. The program handles various options including piping, warning suppression, and setting
9 * the major order for data storage.
10 *
11 * ### Key Features:
12 * - Converts column data to parameters in the output file.
13 * - Supports row-major and column-major data storage.
14 * - Provides options for piping and warning suppression.
15 * - Handles multiple pages of input data.
16 *
17 * ### Usage:
18 * ```
19 * sddsexpand [-pipe=[input][,output]] [<SDDSinputfile>] [<SDDSoutputfile>]
20 * [-noWarnings] [-majorOrder=row|column]
21 * ```
22 *
23 * ### Example:
24 * Convert the data in `input.sdds` to parameters and write to `output.sdds`:
25 * ```
26 * sddsexpand input.sdds output.sdds
27 * ```
28 *
29 * ### Options:
30 * - `-pipe=[input][,output]` : Use piping for input and/or output.
31 * - `-noWarnings` : Suppress warnings.
32 * - `-majorOrder=row|column` : Specify the major order of data storage.
33 *
34 * ### Notes:
35 * - All columns in the input file are converted to parameters in the output file.
36 * - For each row of each page in the input file, a new page is created in the output file.
37 *
38 * @copyright
39 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
40 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
41 *
42 * @license
43 * This file is distributed under the terms of the Software License Agreement
44 * found in the file LICENSE included with this distribution.
45 *
46 * @author M. Borland, R. Soliday, H. Shang
47 */
48
49#include "mdb.h"
50#include "scan.h"
51#include "SDDS.h"
52
53/* Enum for command-line options */
54typedef enum {
55 SET_PIPE, /* Option for piping input and output */
56 SET_NOWARNINGS, /* Option to suppress warnings */
57 SET_MAJOR_ORDER, /* Option to set row/column major order */
58 N_OPTIONS /* Number of options */
60
61/* Array of command-line option strings */
62static char *option[N_OPTIONS] = {
63 "pipe",
64 "nowarnings",
65 "majorOrder",
66};
67
68/* Program usage message */
69static char *USAGE =
70 "sddsexpand [-pipe=[input][,output]] [<SDDSinputfile>] [<SDDSoutputfile>]\n"
71 "[-noWarnings] [-majorOrder=row|column] \n\n"
72 "sddsexpand is the partial inverse of sddscollapse.\n"
73 "All columns of the input file are turned into parameters in the output file.\n"
74 "For each row of each page in the input file, sddsexpand emits a new page\n"
75 "with parameter values equal to the column values for that page and row.\n\n"
76 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
77
78#define ROW_INCREMENT 100 /* Increment size for row allocation */
79
80/* Structure to represent source data (columns or parameters) */
81typedef struct {
82 char *name; /* Name of the data source */
83 long size; /* Size of the data type */
84 long index; /* Index in the target data set */
85 short do_copy; /* Flag to indicate if this data should be copied */
87
88int main(int argc, char **argv) {
89 SDDS_DATASET SDDS_input, SDDS_output;
90 char *inputfile = NULL, *outputfile = NULL;
91 int64_t rows, irow;
92 long i, no_warnings = 0;
93 SCANNED_ARG *s_arg;
94 unsigned long pipe_flags = 0, major_order_flag;
95 void **data = NULL;
96 SourceData *column_source = NULL, *parameter_source = NULL;
97 int32_t column_sources, parameter_sources;
98 char **name;
99 char buffer[32];
100 short column_major_order = -1;
101
102 /* Register the program name for error reporting */
104
105 /* Parse command-line arguments */
106 argc = scanargs(&s_arg, argc, argv);
107 if (argc < 2)
108 bomb(NULL, USAGE);
109
110 /* Process each argument */
111 for (int i_arg = 1; i_arg < argc; i_arg++) {
112 if (s_arg[i_arg].arg_type == OPTION) {
113 /* Match the argument to known options */
114 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
115 case SET_MAJOR_ORDER:
116 major_order_flag = 0;
117 s_arg[i_arg].n_items--;
118 if (s_arg[i_arg].n_items > 0 && (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
119 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
120 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) {
121 SDDS_Bomb("invalid -majorOrder syntax/values");
122 }
123 column_major_order = (major_order_flag & SDDS_COLUMN_MAJOR_ORDER) ? 1 : 0;
124 break;
125 case SET_PIPE:
126 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags))
127 SDDS_Bomb("invalid -pipe syntax");
128 break;
129 case SET_NOWARNINGS:
130 no_warnings = 1;
131 break;
132 default:
133 fprintf(stderr, "error: unknown switch: %s\n", s_arg[i_arg].list[0]);
134 exit(EXIT_FAILURE);
135 }
136 } else {
137 /* Handle positional arguments */
138 if (!inputfile)
139 inputfile = s_arg[i_arg].list[0];
140 else if (!outputfile)
141 outputfile = s_arg[i_arg].list[0];
142 else
143 SDDS_Bomb("too many filenames");
144 }
145 }
146
147 /* Handle filenames and initialize input and output datasets */
148 processFilenames("sddsexpand", &inputfile, &outputfile, pipe_flags, no_warnings, NULL);
149
150 if (!SDDS_InitializeInput(&SDDS_input, inputfile) ||
151 !SDDS_InitializeOutput(&SDDS_output, SDDS_BINARY, 1, NULL, NULL, outputfile)) {
152 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
153 }
154
155 /* Set major order for output dataset */
156 SDDS_output.layout.data_mode.column_major = (column_major_order != -1) ? column_major_order : SDDS_input.layout.data_mode.column_major;
157
158 /* Retrieve column names from input dataset */
159 if (!(name = SDDS_GetColumnNames(&SDDS_input, &column_sources)) ||
160 !(column_source = SDDS_Malloc(sizeof(*column_source) * column_sources))) {
161 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
162 }
163
164 /* Define parameters based on input columns */
165 for (i = 0; i < column_sources; i++) {
166 column_source[i].name = name[i];
167 if (!SDDS_DefineParameterLikeColumn(&SDDS_output, &SDDS_input, column_source[i].name, NULL) ||
168 (column_source[i].index = SDDS_GetParameterIndex(&SDDS_output, column_source[i].name)) < 0) {
169 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
170 }
171 column_source[i].size = SDDS_GetTypeSize(SDDS_GetParameterType(&SDDS_output, column_source[i].index));
172 }
173
174 /* Retrieve parameter names from input dataset */
175 if (!(name = SDDS_GetParameterNames(&SDDS_input, &parameter_sources)) ||
176 !(parameter_source = SDDS_Malloc(sizeof(*parameter_source) * parameter_sources))) {
177 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
178 }
179
180 /* Handle parameter definitions and potential name conflicts */
181 for (i = 0; i < parameter_sources; i++) {
182 parameter_source[i].name = name[i];
183 if (SDDS_GetParameterIndex(&SDDS_output, parameter_source[i].name) >= 0) {
184 if (!no_warnings)
185 fprintf(stderr, "Warning (sddsexpand): name %s used for parameter and column in input file. Column data used.\n", parameter_source[i].name);
186 parameter_source[i].do_copy = 0;
187 continue;
188 }
189 parameter_source[i].do_copy = 1;
190 if (!SDDS_TransferParameterDefinition(&SDDS_output, &SDDS_input, parameter_source[i].name, NULL) ||
191 (parameter_source[i].index = SDDS_GetParameterIndex(&SDDS_output, parameter_source[i].name)) < 0) {
192 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
193 }
194 parameter_source[i].size = SDDS_GetTypeSize(SDDS_GetParameterType(&SDDS_output, parameter_source[i].index));
195 }
196
197 /* Write layout to output dataset and allocate memory for column data */
198 if (!SDDS_WriteLayout(&SDDS_output) ||
199 !(data = SDDS_Malloc(sizeof(*data) * column_sources))) {
200 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
201 }
202
203 /* Process each page of the input dataset */
204 while (SDDS_ReadPage(&SDDS_input) > 0) {
205 if ((rows = SDDS_RowCount(&SDDS_input)) < 0)
206 continue;
207
208 /* Retrieve data for each column */
209 for (i = 0; i < column_sources; i++) {
210 if (!(data[i] = SDDS_GetInternalColumn(&SDDS_input, column_source[i].name))) {
211 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
212 }
213 }
214
215 /* Process each row of the current page */
216 for (irow = 0; irow < rows; irow++) {
217 if (!SDDS_StartPage(&SDDS_output, 0)) {
218 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
219 }
220
221 /* Set parameter values in the output dataset */
222 for (i = 0; i < parameter_sources; i++) {
223 if (!parameter_source[i].do_copy)
224 continue;
225 if (!SDDS_GetParameter(&SDDS_input, parameter_source[i].name, buffer) ||
226 !SDDS_SetParameters(&SDDS_output, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, parameter_source[i].index, buffer, -1)) {
227 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
228 }
229 }
230
231 /* Set column values as parameters in the output dataset */
232 for (i = 0; i < column_sources; i++) {
233 if (!SDDS_SetParameters(&SDDS_output, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, column_source[i].index, (((char *)data[i]) + irow * column_source[i].size), -1)) {
234 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
235 }
236 }
237
238 /* Write the current page to the output dataset */
239 if (!SDDS_WritePage(&SDDS_output)) {
240 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
241 }
242 }
243 }
244
245 /* Terminate the input and output datasets */
246 if (!SDDS_Terminate(&SDDS_input) || !SDDS_Terminate(&SDDS_output)) {
247 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
248 exit(EXIT_FAILURE);
249 }
250
251 /* Free allocated memory */
252 free(column_source);
253 free(parameter_source);
254 return 0;
255}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_TransferParameterDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a parameter definition from a source dataset to a target dataset.
int32_t SDDS_GetParameterType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a parameter in the SDDS dataset by its index.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetTypeSize(int32_t type)
Retrieves the size in bytes of a specified SDDS data type.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
OptionType
Enumeration for command-line options.