SDDSlib
Loading...
Searching...
No Matches
sddsbinarystring.c
Go to the documentation of this file.
1/**
2 * @file sddsbinarystring.c
3 * @brief Program for converting integer type columns into binary string representations.
4 *
5 * This program reads integer columns from an SDDS (Self-Describing Data Set) file and converts
6 * their values into binary string representations. The resulting binary strings are written into
7 * new columns appended to the output SDDS file. This program supports piping for input and output
8 * as well as wildcard matching for column names.
9 *
10 * ### Features:
11 * - Converts integer columns into binary string columns.
12 * - Supports piping for input and output.
13 * - Handles both 16-bit and 32-bit integers.
14 * - Can process column names using wildcards.
15 *
16 * ### Usage:
17 * sddsbinarystring [<source-file>] [<target-file>] [-pipe=[input][,output]] [-column=<list of column names>]
18 *
19 * @details
20 * - If no column names are specified, all integer columns will be converted.
21 * - Binary string columns are named as `<originalColumnName>BinaryString`.
22 * - Input and output files can be specified or provided via pipe.
23 *
24 * @copyright
25 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
26 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
27 *
28 * @license
29 * This file is distributed under the terms of the Software License Agreement
30 * found in the file LICENSE included with this distribution.
31 *
32 * @author H. Shang, L. Emery, R. Soliday
33 */
34
35#include "mdb.h"
36#include "SDDS.h"
37#include "scan.h"
38#include "SDDSutils.h"
39#include "ctype.h"
40
41/**
42 * @brief Enumeration for command-line options.
43 */
44typedef enum {
45 SET_COLUMN,
46 SET_PIPE,
47 N_OPTIONS
49
50/**
51 * @brief String representation of the command-line options.
52 */
53char *option[N_OPTIONS] =
54 {
55 "column",
56 "pipe",
57 };
58
59/**
60 * @brief Usage message for the program.
61 */
62char *usage =
63 "Usage: sddsbinarystring [<source-file>] [<target-file>]\n"
64 " [-pipe=[input][,output]]\n"
65 " [-column=<list of column names>]\n\n"
66 "Description:\n"
67 "sddsbinarystring converts integer columns into binary string representations.\n"
68 "Binary string columns are appended as <oldColumnName>BinaryString.\n\n"
69 "Options:\n"
70 "-column List of columns to convert. Wildcards are accepted.\n"
71 "-pipe Use pipes for input and/or output.\n\n"
72 "Author: Hairong (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
73
74int main(int argc, char **argv) {
75 SDDS_DATASET sdds_dataset, sdds_orig;
76 long i, j, i_arg;
77 SCANNED_ARG *s_arg;
78 long tmpfile_used, no_warnings, integer_columns, digits;
79 int32_t column_matches;
80 long index, type;
81 int64_t i_row, rows;
82 void *data;
83 int32_t columns;
84 short *integer_type;
85 char *input, *output, **column_name, **column_match, **integer_column;
86 char buff[1024], **binary_string;
87 unsigned long pipe_flags;
88
89 /* Initialize variables */
90 tmpfile_used = 0;
91 pipe_flags = 0;
92 input = output = NULL;
93 data = NULL;
94 no_warnings = 0;
95
96 binary_string = NULL;
97 buff[0] = 0;
98
99 columns = column_matches = integer_columns = 0;
100 column_name = column_match = integer_column = NULL;
101 integer_type = NULL;
102
103 /* Register the program name for error handling */
105 argc = scanargs(&s_arg, argc, argv);
106 if (argc < 2)
107 bomb(NULL, usage);
108
109 /* Parse command-line arguments */
110 for (i_arg = 1; i_arg < argc; i_arg++) {
111 if (s_arg[i_arg].arg_type == OPTION) {
112 delete_chars(s_arg[i_arg].list[0], "_");
113 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
114 case SET_COLUMN:
115 if (s_arg[i_arg].n_items < 2)
116 SDDS_Bomb("invalid -columns syntax");
117 column_match =
118 tmalloc(sizeof(*column_match) *
119 (column_matches = s_arg[i_arg].n_items - 1));
120 for (i = 0; i < column_matches; i++)
121 column_match[i] = s_arg[i_arg].list[i + 1];
122 break;
123 case SET_PIPE:
124 if (!processPipeOption(s_arg[i_arg].list + 1,
125 s_arg[i_arg].n_items - 1, &pipe_flags))
126 SDDS_Bomb("invalid -pipe syntax");
127 break;
128 default:
129 fprintf(stderr, "Error (%s): unknown switch: %s\n",
130 argv[0], s_arg[i_arg].list[0]);
131 exit(1);
132 break;
133 }
134 } else {
135 if (input == NULL)
136 input = s_arg[i_arg].list[0];
137 else if (output == NULL)
138 output = s_arg[i_arg].list[0];
139 else
140 SDDS_Bomb("too many filenames");
141 }
142 }
143
144 /* Process input and output file names */
145 processFilenames("sddsbinarystring", &input, &output, pipe_flags,
146 no_warnings, &tmpfile_used);
147
148#ifdef DEBUG
149 fprintf(stderr, "Initializing input and output files.\n");
150#endif
151 /* Initialize the SDDS input */
152 if (!SDDS_InitializeInput(&sdds_orig, input)) {
153 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
154 exit(1);
155 }
156
157 /* Retrieve column names based on user input */
158 if (!column_match)
159 column_name = SDDS_GetColumnNames(&sdds_orig, &columns);
160 else {
161 column_name =
162 getMatchingSDDSNames(&sdds_orig, column_match, column_matches,
163 &columns, SDDS_MATCH_COLUMN);
164 free(column_match);
165 }
166
167 /* Identify integer columns for conversion */
168 for (i = 0; i < columns; i++) {
169 index = SDDS_GetColumnIndex(&sdds_orig, column_name[i]);
170 type = SDDS_GetColumnType(&sdds_orig, index);
171 if (type == SDDS_SHORT || type == SDDS_USHORT || type == SDDS_LONG || type == SDDS_ULONG || type == SDDS_LONG64 || type == SDDS_ULONG64) {
172 integer_column =
173 SDDS_Realloc(integer_column,
174 sizeof(*integer_column) * (integer_columns + 1));
175 integer_type =
176 SDDS_Realloc(integer_type,
177 sizeof(*integer_type) * (integer_columns + 1));
178 integer_column[integer_columns] = NULL;
179
180 SDDS_CopyString(&integer_column[integer_columns], column_name[i]);
181 integer_type[integer_columns] = type;
182 integer_columns++;
183 }
184 }
185 SDDS_FreeStringArray(column_name, columns);
186 free(column_name);
187
188 /* Exit if no integer columns are found */
189 if (!integer_columns) {
190 fprintf(stderr, "There are no integer columns in %s for converting.\n",
191 input);
192 exit(1);
193 }
194
195 /* Initialize the SDDS output */
196 if (!SDDS_InitializeCopy(&sdds_dataset, &sdds_orig, output, "w")) {
197 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
198 exit(1);
199 }
200
201 /* Define new columns for binary strings */
202 for (i = 0; i < integer_columns; i++) {
203 sprintf(buff, "%sBinaryString", integer_column[i]);
204 if (!SDDS_DefineSimpleColumn(&sdds_dataset, buff, NULL, SDDS_STRING))
205 SDDS_PrintErrors(stderr,
206 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
207 }
208 if (!SDDS_WriteLayout(&sdds_dataset))
209 SDDS_PrintErrors(stderr,
210 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
211
212#ifdef DEBUG
213 fprintf(stderr, "Reading integer data from input file.\n");
214#endif
215 /* Read and process each page */
216 while (SDDS_ReadPage(&sdds_orig) > 0) {
217 if ((rows = SDDS_CountRowsOfInterest(&sdds_orig)) < 0)
218 continue;
219 binary_string = malloc(sizeof(*binary_string) * rows);
220 if (!SDDS_CopyPage(&sdds_dataset, &sdds_orig))
221 SDDS_PrintErrors(stderr,
222 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
223
224 /* Process each integer column */
225 for (i = 0; i < integer_columns; i++) {
226 if (!(data = SDDS_GetInternalColumn(&sdds_orig, integer_column[i])))
227 SDDS_PrintErrors(stderr,
228 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
229
230 /* Determine the number of bits for binary conversion */
231 if (integer_type[i] == SDDS_SHORT || integer_type[i] == SDDS_USHORT)
232 digits = 16;
233 else if (integer_type[i] == SDDS_LONG || integer_type[i] == SDDS_ULONG)
234 digits = 32;
235 else
236 digits = 64;
237
238 /* Convert each value to binary string */
239 for (i_row = 0; i_row < rows; i_row++) {
240 binary_string[i_row] = NULL;
241 binary_string[i_row] =
242 (char *)malloc(sizeof(**binary_string) * (digits + 1));
243
244 for (j = 0; j < digits; j++) {
245 if (integer_type[i] == SDDS_SHORT)
246 binary_string[i_row][digits - 1 - j] = (((short*)data)[i_row] >> j & 0x1) ? '1' : '0';
247 else if (integer_type[i] == SDDS_USHORT)
248 binary_string[i_row][digits - 1 - j] = (((unsigned short*)data)[i_row] >> j & 0x1) ? '1' : '0';
249 else if (integer_type[i] == SDDS_LONG)
250 binary_string[i_row][digits - 1 - j] = (((int32_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
251 else if (integer_type[i] == SDDS_ULONG)
252 binary_string[i_row][digits - 1 - j] = (((uint32_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
253 else if (integer_type[i] == SDDS_LONG64)
254 binary_string[i_row][digits - 1 - j] = (((int64_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
255 else if (integer_type[i] == SDDS_ULONG64)
256 binary_string[i_row][digits - 1 - j] = (((uint64_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
257 }
258 binary_string[i_row][digits] = 0;
259 }
260
261 /* Add binary string column to the dataset */
262 sprintf(buff, "%sBinaryString", integer_column[i]);
263 if (!SDDS_SetColumn(&sdds_dataset, SDDS_BY_NAME, binary_string,
264 rows, buff))
265 SDDS_PrintErrors(stderr,
266 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
267 SDDS_FreeStringArray(binary_string, rows);
268 }
269 free(binary_string);
270 binary_string = NULL;
271
272 /* Write the processed page */
273 if (!SDDS_WritePage(&sdds_dataset))
274 SDDS_PrintErrors(stderr,
275 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
276 }
277
278 /* Finalize and clean up */
279 if (!SDDS_Terminate(&sdds_orig) || !SDDS_Terminate(&sdds_dataset))
280 SDDS_PrintErrors(stderr,
281 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
282 if (tmpfile_used && !replaceFileAndBackUp(input, output))
283 exit(1);
284 SDDS_FreeStringArray(integer_column, integer_columns);
285 free(integer_column);
286 free_scanargs(&s_arg, argc);
287 return 0;
288}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
char ** getMatchingSDDSNames(SDDS_DATASET *dataset, char **matchName, int32_t matches, int32_t *names, short type)
Retrieves an array of matching SDDS entity names based on specified criteria.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
Definition SDDStypes.h:67
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
Definition SDDStypes.h:55
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
Definition SDDStypes.h:79
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
char * option[N_OPTIONS]
String representation of the command-line options.
OptionType
Enumeration for command-line options.
char * usage
Usage message for the program.