SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sdds2headlessdata.c
Go to the documentation of this file.
1/**
2 * @file sdds2headlessdata.c
3 * @brief Converts SDDS files into binary data without headers.
4 *
5 * @details
6 * This program reads an SDDS (Self Describing Data Set) file, extracts specified columns,
7 * and writes the data to a binary file in either row-major or column-major order. It supports
8 * flexible options for specifying columns, output order, and piping data. At least one column
9 * must be specified using `-column`. The program ensures compatibility of options and processes
10 * the input accordingly.
11 *
12 * @section Usage
13 * ```
14 * sdds2headlessdata [<input>] [<output>]
15 * [-pipe=in|out]
16 * -column=<name>
17 * [-order={rowMajor|columnMajor}]
18 * ```
19 *
20 * @section Options
21 * | Required | Description |
22 * |---------------------------------------|---------------------------------------------------------------------------------------|
23 * | `-column` | Specifies the columns to include in the output. |
24 *
25 * | Optional | Description |
26 * |---------------|-----------------------------------------------------------------------------|
27 * | `-pipe` | Allows piping data into or out of the program. |
28 * | `-order` | Specifies the output order for column data, either row-major or column-major order. |
29 *
30 * @copyright
31 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
32 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
33 *
34 * @license
35 * This file is distributed under the terms of the Software License Agreement
36 * found in the file LICENSE included with this distribution.
37 *
38 * @author
39 * R. Soliday
40 */
41
42#include "mdb.h"
43#include "SDDS.h"
44#include "scan.h"
45
46#ifdef _WIN32
47# include <fcntl.h>
48# include <io.h>
49#endif
50
51#define ROW_ORDER 0
52#define COLUMN_ORDER 1
53#define ORDERS 2
54static char *order_names[ORDERS] = {
55 "rowMajor",
56 "columnMajor",
57};
58
59/* Enumeration for option types */
60enum option_type {
61 SET_COLUMN,
62 SET_PIPE,
63 SET_ORDER,
64 N_OPTIONS
65};
66
67static char *option[N_OPTIONS] = {
68 "column",
69 "pipe",
70 "order",
71};
72
73static char *usage =
74 "sdds2headlessdata [<input>] [<output>]\n"
75 " [-pipe=in|out]\n"
76 " -column=<name>\n"
77 " [-order={rowMajor|columnMajor}] \n"
78 "Options:\n"
79 "-order: Row major order is the default. Each row consists of one element\n"
80 " from each column. In column major order, each column is written entirely\n"
81 " on one row.\n"
82 "-column: Provide the columns whose data are to be written.\n\n"
83 "Program by Hairong Shang.\n"
84 "Link date: " __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION "\n";
85
86int main(int argc, char **argv) {
87 FILE *file_id;
88 SDDS_FILEBUFFER *f_buffer = NULL;
89
90 SDDS_DATASET sdds_dataset, sdds_dummy;
91 SCANNED_ARG *s_arg;
92 long j, i_arg, retval, page_number = 0, size, column_order = 0;
93 int64_t i, rows = 0;
94 char *input = NULL, *output = NULL;
95 unsigned long pipe_flags = 0;
96 long no_warnings = 0, tmpfile_used = 0;
97
98 long *column_type = NULL, *column_index = NULL;
99 void **column_data = NULL;
100 char **column = NULL, **column_match = NULL;
101 int32_t column_matches = 0;
102 int32_t columns = 0;
103
105 argc = scanargs(&s_arg, argc, argv);
106 if (argc < 3) {
107 bomb(NULL, usage);
108 }
109
110 for (i_arg = 1; i_arg < argc; i_arg++) {
111 if (s_arg[i_arg].arg_type == OPTION) {
112 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
113 case SET_ORDER:
114 if (s_arg[i_arg].n_items != 2) {
115 SDDS_Bomb("invalid -order syntax");
116 }
117 switch (match_string(s_arg[i_arg].list[1], order_names, ORDERS, 0)) {
118 case ROW_ORDER:
119 column_order = 0;
120 break;
121 case COLUMN_ORDER:
122 column_order = 1;
123 break;
124 default:
125 SDDS_Bomb("invalid -order syntax");
126 break;
127 }
128 break;
129 case SET_COLUMN:
130 if ((s_arg[i_arg].n_items < 2)) {
131 SDDS_Bomb("invalid -column syntax");
132 }
133 column_matches = s_arg[i_arg].n_items - 1;
134 column_match = tmalloc(sizeof(*column_match) * column_matches);
135 for (i = 0; i < column_matches; i++) {
136 column_match[i] = s_arg[i_arg].list[i + 1];
137 }
138 break;
139 case SET_PIPE:
140 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipe_flags)) {
141 fprintf(stderr, "Error (%s): invalid -pipe syntax\n", argv[0]);
142 return 1;
143 }
144 break;
145 default:
146 fprintf(stderr, "error: unknown switch: %s\n", s_arg[i_arg].list[0]);
147 exit(1);
148 }
149 } else {
150 if (input == NULL) {
151 input = s_arg[i_arg].list[0];
152 } else if (output == NULL) {
153 output = s_arg[i_arg].list[0];
154 } else {
155 fprintf(stderr, "too many filenames");
156 exit(1);
157 }
158 }
159 }
160
161 processFilenames("sdds2headlessdata", &input, &output, pipe_flags, no_warnings, &tmpfile_used);
162
163 if (!column_matches) {
164 SDDS_Bomb("you must specify -column options");
165 }
166
167 if (!SDDS_InitializeInput(&sdds_dataset, input)) {
168 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
169 exit(1);
170 }
171
172 column = getMatchingSDDSNames(&sdds_dataset, column_match, column_matches, &columns, SDDS_MATCH_COLUMN);
173 if (!columns) {
174 SDDS_Bomb("No columns found in the input file.");
175 }
176
177 column_type = tmalloc(sizeof(*column_type) * columns);
178 column_index = tmalloc(sizeof(*column_index) * columns);
179 column_data = tmalloc(sizeof(*column_data) * columns);
180 for (i = 0; i < columns; i++) {
181 if ((column_index[i] = SDDS_GetColumnIndex(&sdds_dataset, column[i])) < 0) {
182 fprintf(stderr, "error: column %s does not exist\n", column[i]);
183 exit(1);
184 }
185 if ((column_type[i] = SDDS_GetColumnType(&sdds_dataset, column_index[i])) <= 0) {
186 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
187 exit(1);
188 }
189 }
190
191 if (!output) {
192#ifdef _WIN32
193 if (_setmode(_fileno(stdout), _O_BINARY) == -1) {
194 fprintf(stderr, "error: unable to set stdout to binary mode\n");
195 exit(1);
196 }
197#endif
198 file_id = stdout;
199 } else {
200 file_id = fopen(output, "wb");
201 }
202 if (file_id == NULL) {
203 fprintf(stderr, "unable to open output file for writing\n");
204 exit(1);
205 }
206
207 f_buffer = &sdds_dummy.fBuffer;
208 f_buffer->buffer = NULL;
209 if (!f_buffer->buffer) {
210 if (!(f_buffer->buffer = f_buffer->data = SDDS_Malloc(sizeof(char) * SDDS_FILEBUFFER_SIZE))) {
211 fprintf(stderr, "Unable to do buffered read--allocation failure\n");
212 exit(1);
213 }
214 f_buffer->bufferSize = SDDS_FILEBUFFER_SIZE;
215 f_buffer->bytesLeft = SDDS_FILEBUFFER_SIZE;
216 }
217
218 retval = -1;
219
220 while (retval != page_number && (retval = SDDS_ReadPage(&sdds_dataset)) > 0) {
221 if (page_number && retval != page_number) {
222 continue;
223 }
224 if ((rows = SDDS_CountRowsOfInterest(&sdds_dataset)) < 0) {
225 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
226 exit(1);
227 }
228 if (rows) {
229 if (column_order) {
230 for (j = 0; j < columns; j++) {
231 if (column_type[j] == SDDS_STRING) {
232 for (i = 0; i < rows; i++) {
233 if (!SDDS_WriteBinaryString(*((char **)sdds_dataset.data[column_index[j]] + i), file_id, f_buffer)) {
234 fprintf(stderr, "Unable to write rows--failure writing string\n");
235 exit(1);
236 }
237 }
238 } else {
239 size = SDDS_type_size[column_type[j] - 1];
240 for (i = 0; i < rows; i++) {
241 if (!SDDS_BufferedWrite((char *)sdds_dataset.data[column_index[j]] + i * size, size, file_id, f_buffer)) {
242 fprintf(stderr, "Unable to write rows--failure writing string\n");
243 exit(1);
244 }
245 }
246 }
247 }
248 } else {
249 for (i = 0; i < rows; i++) {
250 for (j = 0; j < columns; j++) {
251 if (column_type[j] == SDDS_STRING) {
252 if (!SDDS_WriteBinaryString(*((char **)sdds_dataset.data[column_index[j]] + i), file_id, f_buffer)) {
253 fprintf(stderr, "Unable to write rows--failure writing string\n");
254 exit(1);
255 }
256 } else {
257 size = SDDS_type_size[column_type[j] - 1];
258 if (!SDDS_BufferedWrite((char *)sdds_dataset.data[column_index[j]] + i * size, size, file_id, f_buffer)) {
259 fprintf(stderr, "Unable to write rows--failure writing string\n");
260 exit(1);
261 }
262 }
263 }
264 }
265 }
266 }
267 if (retval == 0) {
268 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
269 exit(1);
270 }
271 }
272 if (!SDDS_FlushBuffer(file_id, f_buffer)) {
273 SDDS_SetError("Unable to write page--buffer flushing problem (SDDS_WriteBinaryPage)");
274 return 0;
275 }
276 fclose(file_id);
277
278 if (!SDDS_Terminate(&sdds_dataset)) {
279 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
280 exit(1);
281 }
282 exit(0);
283}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_BufferedWrite(void *target, int64_t targetSize, FILE *fp, SDDS_FILEBUFFER *fBuffer)
int32_t SDDS_WriteBinaryString(char *string, FILE *fp, SDDS_FILEBUFFER *fBuffer)
Writes a binary string to a file with buffering.
int32_t SDDS_FlushBuffer(FILE *fp, SDDS_FILEBUFFER *fBuffer)
int32_t SDDS_type_size[SDDS_NUM_TYPES]
Array of sizes for each supported data type.
Definition SDDS_data.c:62
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** getMatchingSDDSNames(SDDS_DATASET *dataset, char **matchName, int32_t matches, int32_t *names, short type)
Retrieves an array of matching SDDS entity names based on specified criteria.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390