SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsseparate.c File Reference

Detailed Description

Reorganizes column data from an SDDS file onto separate pages.

This program reads an SDDS (Self Describing Data Set) file and reorganizes the column data such that data from different columns ends up on separate pages. The user can specify groups of columns to be combined under a new name, or copy columns across pages.

Usage

sddsseparate [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
[-group=<newName>,<listOfOldNames>]
[-copy=<listOfNames>]

Options

Optional Description
-pipe Enables piping for input and/or output.
-group Groups multiple old column names under a new column name. Data from the old columns will appear on sequential pages under the new name.
-copy Specifies columns to be copied across all pages.

Incompatibilities

  • The same column cannot appear in both -group and -copy lists.

Specific Requirements

  • For -group:
    • All columns in a group must have consistent data types.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
  • M. Borland
  • R. Soliday
  • H. Shang

Definition in file sddsseparate.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 95 of file sddsseparate.c.

95 {
96 char *input = NULL;
97 char *output = NULL;
98 char **copy_column_name = NULL;
99 char **users_copy_column_name = NULL;
100 groups_t *group = NULL;
101 int32_t copy_columns = 0;
102 long users_copy_columns = 0;
103 long groups_count = 0;
104 long i_arg, i, read_code, items;
105 int64_t rows;
106 unsigned long pipe_flags = 0;
107 SCANNED_ARG *sc_arg = NULL;
108 SDDS_DATASET sdds_in, sdds_out;
109
110 /* Register the program name for error reporting */
112
113 /* Parse command-line arguments */
114 argc = scanargs(&sc_arg, argc, argv);
115 if (argc < 2)
116 bomb(NULL, usage_message);
117
118 /* Initialize variables */
119 group = NULL;
120 copy_column_name = users_copy_column_name = NULL;
121 users_copy_columns = copy_columns = groups_count = 0;
122
123 /* Process arguments */
124 for (i_arg = 1; i_arg < argc; i_arg++) {
125 if (sc_arg[i_arg].arg_type == OPTION) {
126 /* Match options and process accordingly */
127 switch (match_string(sc_arg[i_arg].list[0], option_strings, N_OPTIONS, 0)) {
128 case SET_PIPE:
129 /* Handle pipe options */
130 if (!processPipeOption(sc_arg[i_arg].list + 1, sc_arg[i_arg].n_items - 1, &pipe_flags))
131 SDDS_Bomb("invalid -pipe syntax");
132 break;
133
134 case SET_GROUP:
135 /* Handle group options */
136 items = sc_arg[i_arg].n_items - 1;
137 if (items < 2)
138 SDDS_Bomb("invalid -group syntax");
139 group = SDDS_Realloc(group, sizeof(*group) * (groups_count + 1));
140 if (!group ||
141 !SDDS_CopyString(&group[groups_count].new_name, sc_arg[i_arg].list[1]) ||
142 !(group[groups_count].users_old_name = SDDS_Malloc(sizeof(*group[groups_count].users_old_name) * (group[groups_count].users_old_names = items - 1))) ||
143 !SDDS_CopyStringArray(group[groups_count].users_old_name, sc_arg[i_arg].list + 2, group[groups_count].users_old_names))
144 SDDS_Bomb("memory allocation failure");
145 group[groups_count].old_name = NULL;
146 group[groups_count].old_names = 0;
147 groups_count++;
148 break;
149
150 case SET_COPY:
151 /* Handle copy options */
152 if (users_copy_columns)
153 SDDS_Bomb("give -copy only once");
154 users_copy_columns = sc_arg[i_arg].n_items - 1;
155 if (users_copy_columns < 1)
156 SDDS_Bomb("invalid -copy syntax");
157 users_copy_column_name = SDDS_Malloc(sizeof(*users_copy_column_name) * users_copy_columns);
158 if (!users_copy_column_name ||
159 !SDDS_CopyStringArray(users_copy_column_name, sc_arg[i_arg].list + 1, users_copy_columns))
160 SDDS_Bomb("memory allocation failure");
161 break;
162
163 default:
164 /* Handle unknown options */
165 fprintf(stderr, "error: unknown/ambiguous option: %s\n", sc_arg[i_arg].list[0]);
166 exit(EXIT_FAILURE);
167 break;
168 }
169 } else {
170 /* Process input and output filenames */
171 if (!input)
172 input = sc_arg[i_arg].list[0];
173 else if (!output)
174 output = sc_arg[i_arg].list[0];
175 else
176 SDDS_Bomb("too many filenames seen");
177 }
178 }
179
180 if (groups_count == 0)
181 SDDS_Bomb("no groups defined");
182
183 /* Process filenames and open input file */
184 processFilenames("sddsseparate", &input, &output, pipe_flags, 0, NULL);
185
186 if (!SDDS_InitializeInput(&sdds_in, input))
187 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
188
189 /* Process copy columns */
190 if (users_copy_columns) {
191 SDDS_SetColumnFlags(&sdds_in, 0);
192 for (i = 0; i < users_copy_columns; i++)
193 SDDS_SetColumnsOfInterest(&sdds_in, SDDS_MATCH_STRING, users_copy_column_name[i], SDDS_OR);
194 copy_column_name = SDDS_GetColumnNames(&sdds_in, &copy_columns);
195 if (!copy_column_name || copy_columns == 0)
196 SDDS_Bomb("no match for copy columns");
197 }
198
199 /* Process groups */
200 for (i = 0; i < groups_count; i++) {
201 long j, type = 0;
202 SDDS_SetColumnFlags(&sdds_in, 0);
203
204 for (j = 0; j < group[i].users_old_names; j++)
205 SDDS_SetColumnsOfInterest(&sdds_in, SDDS_MATCH_STRING, group[i].users_old_name[j], SDDS_OR);
206
207 group[i].old_name = SDDS_GetColumnNames(&sdds_in, &group[i].old_names);
208 if (!group[i].old_name) {
209 fprintf(stderr, "No match for group %s (sddsseparate)\n", group[i].new_name);
210 exit(EXIT_FAILURE);
211 }
212
213 if (i > 0 && group[i - 1].old_names != group[i].old_names) {
214 fprintf(stderr, "Group %s comprises %" PRId32 " columns, whereas the last group comprises %" PRId32 " (sddsseparate)\n",
215 group[i].new_name, group[i].old_names, group[i - 1].old_names);
216 exit(EXIT_FAILURE);
217 }
218
219 type = SDDS_GetColumnType(&sdds_in, SDDS_GetColumnIndex(&sdds_in, group[i].old_name[0]));
220 for (j = 1; j < group[i].old_names; j++) {
221 if (type != SDDS_GetColumnType(&sdds_in, SDDS_GetColumnIndex(&sdds_in, group[i].old_name[j]))) {
222 fprintf(stderr, "Inconsistent data types in group %s (sddsseparate)\n", group[i].new_name);
223 fprintf(stderr, "First inconsistent column is %s\n", group[i].old_name[j]);
224 exit(EXIT_FAILURE);
225 }
226 }
227 }
228
229 /* Initialize output file */
230 if (!SDDS_InitializeOutput(&sdds_out, SDDS_BINARY, 0, NULL, NULL, output) ||
231 !SDDS_TransferAllParameterDefinitions(&sdds_out, &sdds_in, 0))
232 SDDS_Bomb("problem initializing output file");
233
234 for (i = 0; i < copy_columns; i++) {
235 if (!SDDS_TransferColumnDefinition(&sdds_out, &sdds_in, copy_column_name[i], NULL))
236 SDDS_Bomb("problem transferring copy column definitions to output file");
237 }
238
239 for (i = 0; i < groups_count; i++) {
240 char *name;
241 if (!SDDS_TransferColumnDefinition(&sdds_out, &sdds_in, group[i].old_name[0], group[i].new_name)) {
242 fprintf(stderr, "Problem transferring column %s as %s to output file (sddsseparate)\n",
243 group[i].old_name[0], group[i].new_name);
244 exit(EXIT_FAILURE);
245 }
246
247 group[i].parameter_name = SDDS_Malloc(sizeof(*name) * (strlen(group[i].new_name) + 100));
248 if (!group[i].parameter_name)
249 SDDS_Bomb("memory allocation failure");
250
251 sprintf(group[i].parameter_name, "%sSourceColumn", group[i].new_name);
252 if (!SDDS_DefineSimpleParameter(&sdds_out, group[i].parameter_name, NULL, SDDS_STRING))
253 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
254 }
255
256 if (!SDDS_WriteLayout(&sdds_out))
257 SDDS_Bomb("problem writing layout to output file");
258
259 /* Process each page */
260 while ((read_code = SDDS_ReadPage(&sdds_in)) > 0) {
261 rows = SDDS_CountRowsOfInterest(&sdds_in);
262 if (rows < 0)
263 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
264 if (rows == 0)
265 continue;
266
267 for (i = 0; i < group[0].old_names; i++) {
268 long ic, ig;
269
270 if (!SDDS_StartPage(&sdds_out, rows) ||
271 !SDDS_CopyParameters(&sdds_out, &sdds_in))
272 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
273
274 for (ic = 0; ic < copy_columns; ic++) {
275 void *data = SDDS_GetInternalColumn(&sdds_in, copy_column_name[ic]);
276 if (!data ||
277 !SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, data, rows, copy_column_name[ic]))
278 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
279 }
280
281 for (ig = 0; ig < groups_count; ig++) {
282 void *data;
283
284 if (!SDDS_SetParameters(&sdds_out, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
285 group[ig].parameter_name, group[ig].old_name[i], NULL))
286 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
287
288 data = SDDS_GetInternalColumn(&sdds_in, group[ig].old_name[i]);
289 if (!data ||
290 !SDDS_SetColumn(&sdds_out, SDDS_SET_BY_NAME, data, rows, group[ig].new_name))
291 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
292 }
293
294 if (!SDDS_WritePage(&sdds_out))
295 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
296 }
297 }
298
299 /* Terminate input and output files */
300 if (!SDDS_Terminate(&sdds_in)) {
301 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
302 exit(EXIT_FAILURE);
303 }
304
305 if (!SDDS_Terminate(&sdds_out)) {
306 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
307 exit(EXIT_FAILURE);
308 }
309
310 return 0;
311}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
int32_t SDDS_CopyStringArray(char **target, char **source, int64_t n_strings)
Copies an array of strings from source to target.
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390