SDDSlib
Loading...
Searching...
No Matches
sdds2headlessdata.c
Go to the documentation of this file.
1/**
2 * @file sdds2headlessdata.c
3 * @brief Converts SDDS files into binary data without headers.
4 *
5 * This program reads an SDDS file, extracts specified columns, and writes the data
6 * to a binary file in either row-major or column-major order. The program supports
7 * flexible options for specifying columns, output order, and piping data.
8 *
9 * ## Usage:
10 *
11 * sdds2headlessdata <input> <output>
12 * [-order={rowMajor|columnMajor}] [-pipe=in|out]
13 * [-column=<name>]
14 *
15 * - **-order:** Specifies the output order:
16 * - `rowMajor` (default): Each row consists of one element from each column.
17 * - `columnMajor`: Each column is written entirely in one row.
18 * - **-column:** Specifies the columns to include in the output.
19 * - **-pipe:** Allows piping data into or out of the program.
20 *
21 * ## Options:
22 * - The program requires at least one column to be specified with `-column`.
23 * - Input and output file names can be specified, or data can be piped.
24 *
25 * ## Example:
26 *
27 * sdds2headlessdata input.sdds output.bin -order=rowMajor -column=x -column=y
28 *
29 * @copyright
30 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
31 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
32 *
33 * @license
34 * This file is distributed under the terms of the Software License Agreement
35 * found in the file LICENSE included with this distribution.
36 *
37 * @author R. Soliday
38 */
39
40#include "mdb.h"
41#include "SDDS.h"
42#include "scan.h"
43
44#ifdef _WIN32
45# include <fcntl.h>
46# include <io.h>
47#endif
48
49#define ROW_ORDER 0
50#define COLUMN_ORDER 1
51#define ORDERS 2
52static char *order_names[ORDERS] = {
53 "rowMajor",
54 "columnMajor",
55};
56
57/* Enumeration for option types */
58enum option_type {
59 SET_COLUMN,
60 SET_PIPE,
61 SET_ORDER,
62 N_OPTIONS
63};
64
65static char *option[N_OPTIONS] = {
66 "column",
67 "pipe",
68 "order",
69};
70
71static char *usage =
72 "sdds2headlessdata <input> <output>\n"
73 "[-order={rowMajor|columnMajor}] [-pipe=in|out]\n"
74 "[-column=<name>]\n\n"
75 "-order: Row major order is the default. Each row consists of one element\n"
76 " from each column. In column major order, each column is written entirely\n"
77 " on one row.\n"
78 "-column: Provide the columns whose data are to be written.\n\n"
79 "Program by Hairong Shang.\n"
80 "Link date: " __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION "\n";
81
82int main(int argc, char **argv) {
83 FILE *file_id;
84 SDDS_FILEBUFFER *f_buffer = NULL;
85
86 SDDS_DATASET sdds_dataset, sdds_dummy;
87 SCANNED_ARG *s_arg;
88 long j, i_arg, retval, page_number = 0, size, column_order = 0;
89 int64_t i, rows = 0;
90 char *input = NULL, *output = NULL;
91 unsigned long pipe_flags = 0;
92 long no_warnings = 0, tmpfile_used = 0;
93
94 long *column_type = NULL, *column_index = NULL;
95 void **column_data = NULL;
96 char **column = NULL, **column_match = NULL;
97 int32_t column_matches = 0;
98 int32_t columns = 0;
99
101 argc = scanargs(&s_arg, argc, argv);
102 if (argc < 3) {
103 bomb(NULL, usage);
104 }
105
106 for (i_arg = 1; i_arg < argc; i_arg++) {
107 if (s_arg[i_arg].arg_type == OPTION) {
108 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
109 case SET_ORDER:
110 if (s_arg[i_arg].n_items != 2) {
111 SDDS_Bomb("invalid -order syntax");
112 }
113 switch (match_string(s_arg[i_arg].list[1], order_names, ORDERS, 0)) {
114 case ROW_ORDER:
115 column_order = 0;
116 break;
117 case COLUMN_ORDER:
118 column_order = 1;
119 break;
120 default:
121 SDDS_Bomb("invalid -order syntax");
122 break;
123 }
124 break;
125 case SET_COLUMN:
126 if ((s_arg[i_arg].n_items < 2)) {
127 SDDS_Bomb("invalid -column syntax");
128 }
129 column_matches = s_arg[i_arg].n_items - 1;
130 column_match = tmalloc(sizeof(*column_match) * column_matches);
131 for (i = 0; i < column_matches; i++) {
132 column_match[i] = s_arg[i_arg].list[i + 1];
133 }
134 break;
135 case SET_PIPE:
136 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags)) {
137 fprintf(stderr, "Error (%s): invalid -pipe syntax\n", argv[0]);
138 return 1;
139 }
140 break;
141 default:
142 fprintf(stderr, "error: unknown switch: %s\n", s_arg[i_arg].list[0]);
143 exit(1);
144 }
145 } else {
146 if (input == NULL) {
147 input = s_arg[i_arg].list[0];
148 } else if (output == NULL) {
149 output = s_arg[i_arg].list[0];
150 } else {
151 fprintf(stderr, "too many filenames");
152 exit(1);
153 }
154 }
155 }
156
157 processFilenames("sdds2headlessdata", &input, &output, pipe_flags, no_warnings, &tmpfile_used);
158
159 if (!column_matches) {
160 SDDS_Bomb("you must specify -column options");
161 }
162
163 if (!SDDS_InitializeInput(&sdds_dataset, input)) {
164 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
165 exit(1);
166 }
167
168 column = getMatchingSDDSNames(&sdds_dataset, column_match, column_matches, &columns, SDDS_MATCH_COLUMN);
169 if (!columns) {
170 SDDS_Bomb("No columns found in the input file.");
171 }
172
173 column_type = tmalloc(sizeof(*column_type) * columns);
174 column_index = tmalloc(sizeof(*column_index) * columns);
175 column_data = tmalloc(sizeof(*column_data) * columns);
176 for (i = 0; i < columns; i++) {
177 if ((column_index[i] = SDDS_GetColumnIndex(&sdds_dataset, column[i])) < 0) {
178 fprintf(stderr, "error: column %s does not exist\n", column[i]);
179 exit(1);
180 }
181 if ((column_type[i] = SDDS_GetColumnType(&sdds_dataset, column_index[i])) <= 0) {
182 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
183 exit(1);
184 }
185 }
186
187 if (!output) {
188#ifdef _WIN32
189 if (_setmode(_fileno(stdout), _O_BINARY) == -1) {
190 fprintf(stderr, "error: unable to set stdout to binary mode\n");
191 exit(1);
192 }
193#endif
194 file_id = stdout;
195 } else {
196 file_id = fopen(output, "wb");
197 }
198 if (file_id == NULL) {
199 fprintf(stderr, "unable to open output file for writing\n");
200 exit(1);
201 }
202
203 f_buffer = &sdds_dummy.fBuffer;
204 f_buffer->buffer = NULL;
205 if (!f_buffer->buffer) {
206 if (!(f_buffer->buffer = f_buffer->data = SDDS_Malloc(sizeof(char) * SDDS_FILEBUFFER_SIZE))) {
207 fprintf(stderr, "Unable to do buffered read--allocation failure\n");
208 exit(1);
209 }
210 f_buffer->bufferSize = SDDS_FILEBUFFER_SIZE;
211 f_buffer->bytesLeft = SDDS_FILEBUFFER_SIZE;
212 }
213
214 retval = -1;
215
216 while (retval != page_number && (retval = SDDS_ReadPage(&sdds_dataset)) > 0) {
217 if (page_number && retval != page_number) {
218 continue;
219 }
220 if ((rows = SDDS_CountRowsOfInterest(&sdds_dataset)) < 0) {
221 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
222 exit(1);
223 }
224 if (rows) {
225 if (column_order) {
226 for (j = 0; j < columns; j++) {
227 if (column_type[j] == SDDS_STRING) {
228 for (i = 0; i < rows; i++) {
229 if (!SDDS_WriteBinaryString(*((char **)sdds_dataset.data[column_index[j]] + i), file_id, f_buffer)) {
230 fprintf(stderr, "Unable to write rows--failure writing string\n");
231 exit(1);
232 }
233 }
234 } else {
235 size = SDDS_type_size[column_type[j] - 1];
236 for (i = 0; i < rows; i++) {
237 if (!SDDS_BufferedWrite((char *)sdds_dataset.data[column_index[j]] + i * size, size, file_id, f_buffer)) {
238 fprintf(stderr, "Unable to write rows--failure writing string\n");
239 exit(1);
240 }
241 }
242 }
243 }
244 } else {
245 for (i = 0; i < rows; i++) {
246 for (j = 0; j < columns; j++) {
247 if (column_type[j] == SDDS_STRING) {
248 if (!SDDS_WriteBinaryString(*((char **)sdds_dataset.data[column_index[j]] + i), file_id, f_buffer)) {
249 fprintf(stderr, "Unable to write rows--failure writing string\n");
250 exit(1);
251 }
252 } else {
253 size = SDDS_type_size[column_type[j] - 1];
254 if (!SDDS_BufferedWrite((char *)sdds_dataset.data[column_index[j]] + i * size, size, file_id, f_buffer)) {
255 fprintf(stderr, "Unable to write rows--failure writing string\n");
256 exit(1);
257 }
258 }
259 }
260 }
261 }
262 }
263 if (retval == 0) {
264 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
265 exit(1);
266 }
267 }
268 if (!SDDS_FlushBuffer(file_id, f_buffer)) {
269 SDDS_SetError("Unable to write page--buffer flushing problem (SDDS_WriteBinaryPage)");
270 return 0;
271 }
272 fclose(file_id);
273
274 if (!SDDS_Terminate(&sdds_dataset)) {
275 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
276 exit(1);
277 }
278 exit(0);
279}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_BufferedWrite(void *target, int64_t targetSize, FILE *fp, SDDS_FILEBUFFER *fBuffer)
int32_t SDDS_WriteBinaryString(char *string, FILE *fp, SDDS_FILEBUFFER *fBuffer)
Writes a binary string to a file with buffering.
int32_t SDDS_FlushBuffer(FILE *fp, SDDS_FILEBUFFER *fBuffer)
int32_t SDDS_type_size[SDDS_NUM_TYPES]
Array of sizes for each supported data type.
Definition SDDS_data.c:62
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** getMatchingSDDSNames(SDDS_DATASET *dataset, char **matchName, int32_t matches, int32_t *names, short type)
Retrieves an array of matching SDDS entity names based on specified criteria.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390