SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsbinarystring.c File Reference

Detailed Description

Converts integer type columns from SDDS files into binary string representations.

This program reads integer columns from an SDDS (Self-Describing Data Set) file and converts their values into binary string representations. The resulting binary strings are appended as new columns in the output SDDS file. If no column names are specified, all integer columns will be converted. The binary string columns are named as <originalColumnName>BinaryString.

Usage

sddsbinarystring [<source-file>] [<target-file>]
[-pipe=[input][,output]]
[-column=<list of column names>]

Options

Required Description
-column Specifies the list of columns to convert. Wildcards are allowed.
Optional Description
-pipe Enables piping for input and/or output.

Specific Requirements

  • For -column, the specified columns must exist in the source file, and must be of an integer type (e.g., SHORT, USHORT, LONG, ULONG, LONG64, ULONG64).
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
H. Shang, L. Emery, R. Soliday

Definition in file sddsbinarystring.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include "SDDSutils.h"
#include "ctype.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 73 of file sddsbinarystring.c.

73 {
74 SDDS_DATASET sdds_dataset, sdds_orig;
75 long i, j, i_arg;
76 SCANNED_ARG *s_arg;
77 long tmpfile_used, no_warnings, integer_columns, digits;
78 int32_t column_matches;
79 long index, type;
80 int64_t i_row, rows;
81 void *data;
82 int32_t columns;
83 short *integer_type;
84 char *input, *output, **column_name, **column_match, **integer_column;
85 char buff[1024], **binary_string;
86 unsigned long pipe_flags;
87
88 /* Initialize variables */
89 tmpfile_used = 0;
90 pipe_flags = 0;
91 input = output = NULL;
92 data = NULL;
93 no_warnings = 0;
94
95 binary_string = NULL;
96 buff[0] = 0;
97
98 columns = column_matches = integer_columns = 0;
99 column_name = column_match = integer_column = NULL;
100 integer_type = NULL;
101
102 /* Register the program name for error handling */
104 argc = scanargs(&s_arg, argc, argv);
105 if (argc < 2)
106 bomb(NULL, usage);
107
108 /* Parse command-line arguments */
109 for (i_arg = 1; i_arg < argc; i_arg++) {
110 if (s_arg[i_arg].arg_type == OPTION) {
111 delete_chars(s_arg[i_arg].list[0], "_");
112 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
113 case SET_COLUMN:
114 if (s_arg[i_arg].n_items < 2)
115 SDDS_Bomb("invalid -columns syntax");
116 column_match =
117 tmalloc(sizeof(*column_match) *
118 (column_matches = s_arg[i_arg].n_items - 1));
119 for (i = 0; i < column_matches; i++)
120 column_match[i] = s_arg[i_arg].list[i + 1];
121 break;
122 case SET_PIPE:
123 if (!processPipeOption(s_arg[i_arg].list + 1,
124 s_arg[i_arg].n_items - 1, &pipe_flags))
125 SDDS_Bomb("invalid -pipe syntax");
126 break;
127 default:
128 fprintf(stderr, "Error (%s): unknown switch: %s\n",
129 argv[0], s_arg[i_arg].list[0]);
130 exit(1);
131 break;
132 }
133 } else {
134 if (input == NULL)
135 input = s_arg[i_arg].list[0];
136 else if (output == NULL)
137 output = s_arg[i_arg].list[0];
138 else
139 SDDS_Bomb("too many filenames");
140 }
141 }
142
143 /* Process input and output file names */
144 processFilenames("sddsbinarystring", &input, &output, pipe_flags,
145 no_warnings, &tmpfile_used);
146
147#ifdef DEBUG
148 fprintf(stderr, "Initializing input and output files.\n");
149#endif
150 /* Initialize the SDDS input */
151 if (!SDDS_InitializeInput(&sdds_orig, input)) {
152 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
153 exit(1);
154 }
155
156 /* Retrieve column names based on user input */
157 if (!column_match)
158 column_name = SDDS_GetColumnNames(&sdds_orig, &columns);
159 else {
160 column_name =
161 getMatchingSDDSNames(&sdds_orig, column_match, column_matches,
162 &columns, SDDS_MATCH_COLUMN);
163 free(column_match);
164 }
165
166 /* Identify integer columns for conversion */
167 for (i = 0; i < columns; i++) {
168 index = SDDS_GetColumnIndex(&sdds_orig, column_name[i]);
169 type = SDDS_GetColumnType(&sdds_orig, index);
170 if (type == SDDS_SHORT || type == SDDS_USHORT || type == SDDS_LONG || type == SDDS_ULONG || type == SDDS_LONG64 || type == SDDS_ULONG64) {
171 integer_column =
172 SDDS_Realloc(integer_column,
173 sizeof(*integer_column) * (integer_columns + 1));
174 integer_type =
175 SDDS_Realloc(integer_type,
176 sizeof(*integer_type) * (integer_columns + 1));
177 integer_column[integer_columns] = NULL;
178
179 SDDS_CopyString(&integer_column[integer_columns], column_name[i]);
180 integer_type[integer_columns] = type;
181 integer_columns++;
182 }
183 }
184 SDDS_FreeStringArray(column_name, columns);
185 free(column_name);
186
187 /* Exit if no integer columns are found */
188 if (!integer_columns) {
189 fprintf(stderr, "There are no integer columns in %s for converting.\n",
190 input);
191 exit(1);
192 }
193
194 /* Initialize the SDDS output */
195 if (!SDDS_InitializeCopy(&sdds_dataset, &sdds_orig, output, "w")) {
196 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
197 exit(1);
198 }
199
200 /* Define new columns for binary strings */
201 for (i = 0; i < integer_columns; i++) {
202 sprintf(buff, "%sBinaryString", integer_column[i]);
203 if (!SDDS_DefineSimpleColumn(&sdds_dataset, buff, NULL, SDDS_STRING))
204 SDDS_PrintErrors(stderr,
205 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
206 }
207 if (!SDDS_WriteLayout(&sdds_dataset))
208 SDDS_PrintErrors(stderr,
209 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
210
211#ifdef DEBUG
212 fprintf(stderr, "Reading integer data from input file.\n");
213#endif
214 /* Read and process each page */
215 while (SDDS_ReadPage(&sdds_orig) > 0) {
216 if ((rows = SDDS_CountRowsOfInterest(&sdds_orig)) < 0)
217 continue;
218 binary_string = malloc(sizeof(*binary_string) * rows);
219 if (!SDDS_CopyPage(&sdds_dataset, &sdds_orig))
220 SDDS_PrintErrors(stderr,
221 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
222
223 /* Process each integer column */
224 for (i = 0; i < integer_columns; i++) {
225 if (!(data = SDDS_GetInternalColumn(&sdds_orig, integer_column[i])))
226 SDDS_PrintErrors(stderr,
227 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
228
229 /* Determine the number of bits for binary conversion */
230 if (integer_type[i] == SDDS_SHORT || integer_type[i] == SDDS_USHORT)
231 digits = 16;
232 else if (integer_type[i] == SDDS_LONG || integer_type[i] == SDDS_ULONG)
233 digits = 32;
234 else
235 digits = 64;
236
237 /* Convert each value to binary string */
238 for (i_row = 0; i_row < rows; i_row++) {
239 binary_string[i_row] = NULL;
240 binary_string[i_row] =
241 (char *)malloc(sizeof(**binary_string) * (digits + 1));
242
243 for (j = 0; j < digits; j++) {
244 if (integer_type[i] == SDDS_SHORT)
245 binary_string[i_row][digits - 1 - j] = (((short*)data)[i_row] >> j & 0x1) ? '1' : '0';
246 else if (integer_type[i] == SDDS_USHORT)
247 binary_string[i_row][digits - 1 - j] = (((unsigned short*)data)[i_row] >> j & 0x1) ? '1' : '0';
248 else if (integer_type[i] == SDDS_LONG)
249 binary_string[i_row][digits - 1 - j] = (((int32_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
250 else if (integer_type[i] == SDDS_ULONG)
251 binary_string[i_row][digits - 1 - j] = (((uint32_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
252 else if (integer_type[i] == SDDS_LONG64)
253 binary_string[i_row][digits - 1 - j] = (((int64_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
254 else if (integer_type[i] == SDDS_ULONG64)
255 binary_string[i_row][digits - 1 - j] = (((uint64_t*)data)[i_row] >> j & 0x1) ? '1' : '0';
256 }
257 binary_string[i_row][digits] = 0;
258 }
259
260 /* Add binary string column to the dataset */
261 sprintf(buff, "%sBinaryString", integer_column[i]);
262 if (!SDDS_SetColumn(&sdds_dataset, SDDS_BY_NAME, binary_string,
263 rows, buff))
264 SDDS_PrintErrors(stderr,
265 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
266 SDDS_FreeStringArray(binary_string, rows);
267 }
268 free(binary_string);
269 binary_string = NULL;
270
271 /* Write the processed page */
272 if (!SDDS_WritePage(&sdds_dataset))
273 SDDS_PrintErrors(stderr,
274 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
275 }
276
277 /* Finalize and clean up */
278 if (!SDDS_Terminate(&sdds_orig) || !SDDS_Terminate(&sdds_dataset))
279 SDDS_PrintErrors(stderr,
280 SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281 if (tmpfile_used && !replaceFileAndBackUp(input, output))
282 exit(1);
283 SDDS_FreeStringArray(integer_column, integer_columns);
284 free(integer_column);
285 free_scanargs(&s_arg, argc);
286 return 0;
287}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
void * SDDS_GetInternalColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves an internal pointer to the data of a specified column, including all rows.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
char ** getMatchingSDDSNames(SDDS_DATASET *dataset, char **matchName, int32_t matches, int32_t *names, short type)
Retrieves an array of matching SDDS entity names based on specified criteria.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
Definition SDDStypes.h:67
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
Definition SDDStypes.h:55
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
Definition SDDStypes.h:79
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584