SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsexpand.c File Reference

Detailed Description

Converts SDDS column data into parameters in a new SDDS file.

This program reads an SDDS (Self Describing Data Set) file and creates a new SDDS file where columns in the input file are converted to parameters. Each page in the output file corresponds to a row in the input file. This functionality is effectively the inverse of the sddscollapse program. The program supports various options including piping, warning suppression, and setting the major order for data storage.

Usage

sddsexpand [<SDDSinputfile>] [<SDDSoutputfile>]
[-pipe=[input][,output]]
[-noWarnings]
[-majorOrder=row|column]

Options

Option Description
-pipe Use piping for input and/or output.
-noWarnings Suppress warnings during processing.
-majorOrder Specify the major order for data storage (row or column).
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, H. Shang

Definition in file sddsexpand.c.

#include "mdb.h"
#include "scan.h"
#include "SDDS.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 80 of file sddsexpand.c.

80 {
81 SDDS_DATASET SDDS_input, SDDS_output;
82 char *inputfile = NULL, *outputfile = NULL;
83 int64_t rows, irow;
84 long i, no_warnings = 0;
85 SCANNED_ARG *s_arg;
86 unsigned long pipe_flags = 0, major_order_flag;
87 void **data = NULL;
88 SourceData *column_source = NULL, *parameter_source = NULL;
89 int32_t column_sources, parameter_sources;
90 char **name;
91 char buffer[32];
92 short column_major_order = -1;
93
94 /* Register the program name for error reporting */
96
97 /* Parse command-line arguments */
98 argc = scanargs(&s_arg, argc, argv);
99 if (argc < 2)
100 bomb(NULL, USAGE);
101
102 /* Process each argument */
103 for (int i_arg = 1; i_arg < argc; i_arg++) {
104 if (s_arg[i_arg].arg_type == OPTION) {
105 /* Match the argument to known options */
106 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
107 case SET_MAJOR_ORDER:
108 major_order_flag = 0;
109 s_arg[i_arg].n_items--;
110 if (s_arg[i_arg].n_items > 0 && (!scanItemList(&major_order_flag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
111 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
112 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) {
113 SDDS_Bomb("invalid -majorOrder syntax/values");
114 }
115 column_major_order = (major_order_flag & SDDS_COLUMN_MAJOR_ORDER) ? 1 : 0;
116 break;
117 case SET_PIPE:
118 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags))
119 SDDS_Bomb("invalid -pipe syntax");
120 break;
121 case SET_NOWARNINGS:
122 no_warnings = 1;
123 break;
124 default:
125 fprintf(stderr, "error: unknown switch: %s\n", s_arg[i_arg].list[0]);
126 exit(EXIT_FAILURE);
127 }
128 } else {
129 /* Handle positional arguments */
130 if (!inputfile)
131 inputfile = s_arg[i_arg].list[0];
132 else if (!outputfile)
133 outputfile = s_arg[i_arg].list[0];
134 else
135 SDDS_Bomb("too many filenames");
136 }
137 }
138
139 /* Handle filenames and initialize input and output datasets */
140 processFilenames("sddsexpand", &inputfile, &outputfile, pipe_flags, no_warnings, NULL);
141
142 if (!SDDS_InitializeInput(&SDDS_input, inputfile) ||
143 !SDDS_InitializeOutput(&SDDS_output, SDDS_BINARY, 1, NULL, NULL, outputfile)) {
144 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
145 }
146
147 /* Set major order for output dataset */
148 SDDS_output.layout.data_mode.column_major = (column_major_order != -1) ? column_major_order : SDDS_input.layout.data_mode.column_major;
149
150 /* Retrieve column names from input dataset */
151 if (!(name = SDDS_GetColumnNames(&SDDS_input, &column_sources)) ||
152 !(column_source = SDDS_Malloc(sizeof(*column_source) * column_sources))) {
153 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
154 }
155
156 /* Define parameters based on input columns */
157 for (i = 0; i < column_sources; i++) {
158 column_source[i].name = name[i];
159 if (!SDDS_DefineParameterLikeColumn(&SDDS_output, &SDDS_input, column_source[i].name, NULL) ||
160 (column_source[i].index = SDDS_GetParameterIndex(&SDDS_output, column_source[i].name)) < 0) {
161 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
162 }
163 column_source[i].size = SDDS_GetTypeSize(SDDS_GetParameterType(&SDDS_output, column_source[i].index));
164 }
165
166 /* Retrieve parameter names from input dataset */
167 if (!(name = SDDS_GetParameterNames(&SDDS_input, &parameter_sources)) ||
168 !(parameter_source = SDDS_Malloc(sizeof(*parameter_source) * parameter_sources))) {
169 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
170 }
171
172 /* Handle parameter definitions and potential name conflicts */
173 for (i = 0; i < parameter_sources; i++) {
174 parameter_source[i].name = name[i];
175 if (SDDS_GetParameterIndex(&SDDS_output, parameter_source[i].name) >= 0) {
176 if (!no_warnings)
177 fprintf(stderr, "Warning (sddsexpand): name %s used for parameter and column in input file. Column data used.\n", parameter_source[i].name);
178 parameter_source[i].do_copy = 0;
179 continue;
180 }
181 parameter_source[i].do_copy = 1;
182 if (!SDDS_TransferParameterDefinition(&SDDS_output, &SDDS_input, parameter_source[i].name, NULL) ||
183 (parameter_source[i].index = SDDS_GetParameterIndex(&SDDS_output, parameter_source[i].name)) < 0) {
184 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
185 }
186 parameter_source[i].size = SDDS_GetTypeSize(SDDS_GetParameterType(&SDDS_output, parameter_source[i].index));
187 }
188
189 /* Write layout to output dataset and allocate memory for column data */
190 if (!SDDS_WriteLayout(&SDDS_output) ||
191 !(data = SDDS_Malloc(sizeof(*data) * column_sources))) {
192 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
193 }
194
195 /* Process each page of the input dataset */
196 while (SDDS_ReadPage(&SDDS_input) > 0) {
197 if ((rows = SDDS_RowCount(&SDDS_input)) < 0)
198 continue;
199
200 /* Retrieve data for each column */
201 for (i = 0; i < column_sources; i++) {
202 if (!(data[i] = SDDS_GetInternalColumn(&SDDS_input, column_source[i].name))) {
203 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
204 }
205 }
206
207 /* Process each row of the current page */
208 for (irow = 0; irow < rows; irow++) {
209 if (!SDDS_StartPage(&SDDS_output, 0)) {
210 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
211 }
212
213 /* Set parameter values in the output dataset */
214 for (i = 0; i < parameter_sources; i++) {
215 if (!parameter_source[i].do_copy)
216 continue;
217 if (!SDDS_GetParameter(&SDDS_input, parameter_source[i].name, buffer) ||
218 !SDDS_SetParameters(&SDDS_output, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, parameter_source[i].index, buffer, -1)) {
219 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
220 }
221 }
222
223 /* Set column values as parameters in the output dataset */
224 for (i = 0; i < column_sources; i++) {
225 if (!SDDS_SetParameters(&SDDS_output, SDDS_SET_BY_INDEX | SDDS_PASS_BY_REFERENCE, column_source[i].index, (((char *)data[i]) + irow * column_source[i].size), -1)) {
226 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
227 }
228 }
229
230 /* Write the current page to the output dataset */
231 if (!SDDS_WritePage(&SDDS_output)) {
232 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
233 }
234 }
235 }
236
237 /* Terminate the input and output datasets */
238 if (!SDDS_Terminate(&SDDS_input) || !SDDS_Terminate(&SDDS_output)) {
239 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
240 exit(EXIT_FAILURE);
241 }
242
243 /* Free allocated memory */
244 free(column_source);
245 free(parameter_source);
246 return 0;
247}
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
void * SDDS_GetParameter(SDDS_DATASET *SDDS_dataset, char *parameter_name, void *memory)
Retrieves the value of a specified parameter from the current data table of a data set.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_TransferParameterDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a parameter definition from a source dataset to a target dataset.
int32_t SDDS_GetParameterType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a parameter in the SDDS dataset by its index.
char ** SDDS_GetParameterNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all parameters in the SDDS dataset.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetTypeSize(int32_t type)
Retrieves the size in bytes of a specified SDDS data type.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.