87char *option[N_OPTIONS] = {
104 " csv2sdds [<inputFile>] [<outputFile>]\n"
105 " [-pipe[=in][,out]]\n"
108 " [-maxRows=<number>]\n"
109 " [-schfile=<filename>] \n"
110 " [-skiplines=<number>]\n"
111 " [-delimiters=start=<start>,end=<char>] \n"
112 " [-separator=<char>]\n"
113 " [-columnData=name=<name>,type=<type>,units=<units>...]\n"
114 " [-uselabels[=units]] \n"
115 " [-majorOrder=row|column]\n"
116 " [-fillIn=<zero|last>]\n"
118 " -pipe[=in][,out] SDDS toolkit pipe option.\n"
119 " -asciiOutput Requests SDDS ASCII output. Default is binary.\n"
120 " -spanLines Ignore line breaks in parsing the input data.\n"
121 " -maxRows=<number> Maximum number of rows to expect in input.\n"
122 " -schfile=<filename> Specifies the SCH file that describes the columns.\n"
123 " -skiplines=<number> Skip the first <number> lines of the input file.\n"
124 " -delimiters=start=<char>,end=<char> Specifies the delimiter characters that bracket fields.\n"
125 " The default is '\"' for both start and end delimiters.\n"
126 " -separator=<char> Specifies the separator character between fields. The default is ','.\n"
127 " -columnData=name=<name>,type=<type>,units=<units>...\n"
128 " Specifies column data details. Must be provided in the order\n"
129 " corresponding to the data columns in the input file.\n"
130 " -uselabels[=units] Defines column names and optionally units from the file headers.\n"
131 " -majorOrder=row|column Specifies the output file major order. Choose between row-major or column-major.\n"
132 " -fillIn=<zero|last> Use '0' or the last value for empty cells. The default is '0'.\n\n"
134 " Converts Comma Separated Values (CSV) data to the SDDS format.\n"
135 " Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
143long ParseSchFile(
char *file,
COLUMN_DATA **columnData,
char *separator,
char *startDelim,
char *endDelim);
145 long asciiOutput,
short columnMajorOrder);
146char *getToken(
char *s,
char separator,
char startDelim,
char endDelim,
char *buffer);
147void writeOneRowToOutputFile(
SDDS_DATASET *SDDSout,
char *ptr,
char separator,
char startDelim,
char endDelim,
148 long spanLines,
COLUMN_DATA *columnData,
long columns, int64_t rows,
short fillInZero);
149void lowerstring(
char *ptr);
151int main(
int argc,
char **argv) {
153 char *input, *output, *schFile;
155 SCANNED_ARG *scanned;
157 int64_t rows, maxRows;
158 long asciiOutput, columns, spanLines, skipLines = 0, lines;
159 short columnlabels = 0, unitlabels = 0, uselabels = 0;
161 char separator, startDelim, endDelim;
162 char s[10240], *ptr, *typeName, *unitsName;
163 unsigned long dummyFlags, pipeFlags, majorOrderFlag, fillInFlag;
164 short columnMajorOrder = 0, fillInZero = 1;
167 argc =
scanargs(&scanned, argc, argv);
171 input = output = schFile = NULL;
172 asciiOutput = spanLines = columns = 0;
180 for (iArg = 1; iArg < argc; iArg++) {
181 if (scanned[iArg].arg_type == OPTION) {
182 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
183 case SET_MAJOR_ORDER:
185 scanned[iArg].n_items--;
186 if (scanned[iArg].n_items > 0 &&
187 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
188 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
189 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
190 SDDS_Bomb(
"invalid -majorOrder syntax/values");
191 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
192 columnMajorOrder = 1;
193 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
194 columnMajorOrder = 0;
196 case SET_ASCIIOUTPUT:
201 scanned[iArg].n_items--;
202 if (scanned[iArg].n_items > 0 &&
203 (!
scanItemList(&fillInFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
204 "zero", -1, NULL, 0, 0x0001UL,
205 "last", -1, NULL, 0, 0x0002UL, NULL)))
206 SDDS_Bomb(
"invalid -fillIn syntax/values");
207 if (fillInFlag & 0x0001UL)
209 else if (fillInFlag & 0x0002UL)
213 if (!(scanned[iArg].n_items -= 1) ||
214 !
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
219 scanned[iArg].n_items++;
222 if (scanned[iArg].n_items != 2 || strlen(scanned[iArg].list[1]) < 1)
225 separator = scanned[iArg].list[1][0];
228 columnData =
SDDS_Realloc(columnData,
sizeof(*columnData) * (columns + 1));
229 columnData[columns].name = NULL;
230 columnData[columns].units = NULL;
233 if (!(scanned[iArg].n_items -= 1) ||
234 !
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
235 "name",
SDDS_STRING, &(columnData[columns].name), 1, 0,
238 !columnData[columns].name ||
239 !strlen(columnData[columns].name) ||
243 scanned[iArg].n_items++;
244 columnData[columns].units = unitsName;
248 if (scanned[iArg].n_items != 2)
250 schFile = scanned[iArg].list[1];
252 fprintf(stderr,
"File not found: %s (csv2sdds)\n", schFile);
257 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
264 if (scanned[iArg].n_items != 2 ||
265 strlen(scanned[iArg].list[1]) < 1 ||
266 sscanf(scanned[iArg].list[1],
"%" SCNd64, &maxRows) != 1 ||
271 if (scanned[iArg].n_items != 2 ||
272 strlen(scanned[iArg].list[1]) < 1 ||
273 sscanf(scanned[iArg].list[1],
"%ld", &skipLines) != 1 ||
278 if (scanned[iArg].n_items > 2)
282 if (scanned[iArg].n_items == 2)
286 bomb(
"Invalid option encountered.", USAGE);
291 input = scanned[iArg].list[0];
293 output = scanned[iArg].list[0];
295 bomb(
"Too many filenames provided.", USAGE);
300 if (!columns && !schFile && !columnlabels)
301 SDDS_Bomb(
"Specify at least one of -columnData, -schFile, or -uselabels options.");
302 if (columns && schFile)
303 SDDS_Bomb(
"Specify either -columnData options or -schFile option, not both.");
304 if (columns && columnlabels)
305 SDDS_Bomb(
"Specify either -columnData options or -uselabels option, not both.");
306 if (schFile && columnlabels)
307 SDDS_Bomb(
"Specify either -schFile option or -uselabels option, not both.");
313 if (!(fpi = fopen(input,
"r")))
314 SDDS_Bomb(
"Problem opening input file.");
320 if (!columns && !(columns = ParseSchFile(schFile, &columnData, &separator, &startDelim, &endDelim)))
321 SDDS_Bomb(
"Problem reading or parsing SCH file.");
323 SetUpOutputFile(&SDDSout, input, output, columnData, columns, asciiOutput, columnMajorOrder);
331 while (fgets(s,
sizeof(s), fpi)) {
334 while ((i = strlen(s)) && s[i - 1] < 27)
337 if (strlen(s) == 0 && (skipLines && (lines > skipLines)))
341 fprintf(stderr,
"line: >%s<\n", ptr);
343 if (columnlabels && (!skipLines || (lines > skipLines))) {
347 ptr = getToken(ptr, separator, startDelim, endDelim, t);
350 columnData =
SDDS_Realloc(columnData,
sizeof(*columnData) * (columns + 1));
351 columnData[columns].name = malloc(strlen(t) + 1);
353 sprintf(columnData[columns].name,
"%s", t);
354 columnData[columns].units = NULL;
360 }
else if (unitlabels && (!skipLines || (lines > skipLines))) {
363 for (i = 0; i < columns; i++) {
364 ptr = getToken(ptr, separator, startDelim, endDelim, t);
367 columnData[i].units = malloc(strlen(t) + 1);
368 sprintf(columnData[i].units,
"%s", t);
379 for (i = 0; i < columns; i++) {
380 tmpPtr = getToken(tmpPtr, separator, startDelim, endDelim, t);
383 if (sscanf(t,
"%lf", &tD) == 1)
386 SetUpOutputFile(&SDDSout, input, output, columnData, columns, asciiOutput, columnMajorOrder);
391 if (!skipLines || (lines > skipLines)) {
392 writeOneRowToOutputFile(&SDDSout, ptr, separator, startDelim, endDelim, spanLines, columnData, columns, rows, fillInZero);
395 if (rows >= maxRows - 1) {
412long ParseSchFile(
char *file,
COLUMN_DATA **columnData,
char *separator,
char *startDelim,
char *endDelim) {
414 char s[10240], *ptr, *ptr0;
415 long l, fieldIndex, lastFieldIndex, columns;
417 if (!(fp = fopen(file,
"r"))) {
423 while (fgets(s,
sizeof(s), fp)) {
424 while ((l = strlen(s)) && s[l - 1] < 27)
428 if (!(ptr = strchr(s,
'=')))
431 if (strcmp(s,
"Filetype") == 0) {
432 if (strcmp(ptr,
"Delimited"))
433 SDDS_Bomb(
"Require Filetype = Delimited in SCH file.");
434 }
else if (strcmp(s,
"Separator") == 0) {
435 if (!(*separator = *ptr))
436 SDDS_Bomb(
"Null separator in SCH file.");
437 }
else if (strcmp(s,
"Delimiter") == 0) {
438 if (!(*endDelim = *startDelim = *ptr))
439 SDDS_Bomb(
"Null delimiter in SCH file.");
440 }
else if (strcmp(s,
"CharSet") == 0) {
441 if (strcmp(ptr,
"ascii"))
442 SDDS_Bomb(
"Require CharSet = ascii in SCH file.");
443 }
else if (strncmp(s,
"Field", strlen(
"Field")) == 0) {
444 if (!sscanf(s,
"Field%ld", &fieldIndex))
445 SDDS_Bomb(
"Error scanning field index in SCH file.");
446 if (fieldIndex - lastFieldIndex != 1)
447 SDDS_Bomb(
"Gap or nonmonotonicity in field index values.");
448 lastFieldIndex = fieldIndex;
449 *columnData =
SDDS_Realloc(*columnData,
sizeof(**columnData) * (columns + 1));
452 if (!(ptr = strchr(ptr0,
',')))
456 (*columnData)[columns].units = NULL;
458 if (!(ptr = strchr(ptr0,
',')))
463 if (strcmp(ptr0,
"string") == 0)
465 else if (strcmp(ptr0,
"char") == 0)
467 else if (strcmp(ptr0,
"float") == 0)
469 else if (strcmp(ptr0,
"double") == 0)
472 fprintf(stderr,
"Unknown type '%s' given to '%s'\n", ptr0, (*columnData)[columns].name);
477 fprintf(stderr,
"Warning: unknown tag value in SCH file: %s\n", s);
484void SetUpOutputFile(
SDDS_DATASET *SDDSout,
char *input,
char *output,
COLUMN_DATA *columnData,
long columns,
long asciiOutput,
short columnMajorOrder) {
488 sprintf(s,
"csv2sdds conversion of %s", input ? input :
"stdin");
492 SDDSout->layout.data_mode.column_major = columnMajorOrder;
493 for (i = 0; i < columns; i++) {
494 if ((columnData[i].index =
SDDS_DefineColumn(SDDSout, columnData[i].name, NULL, columnData[i].units, NULL, NULL, columnData[i].type, 0)) < 0) {
495 sprintf(s,
"Problem defining column %s.", columnData[i].name);
504char *getToken(
char *s,
516 if (*s == separator) {
524 if (*s == startDelim) {
528 if (*ptr == endDelim && *(ptr - 1) !=
'\\') {
534 strncpy(buffer, s, ptr - s - 1);
535 buffer[ptr - s - 1] = 0;
537 if (*ptr && *ptr == separator)
544 while (*ptr && *ptr != separator)
546 if (*ptr == separator) {
547 strncpy(buffer, s, ptr - s);
556void writeOneRowToOutputFile(
SDDS_DATASET *SDDSout,
char *ptr,
char separator,
char startDelim,
char endDelim,
long spanLines,
COLUMN_DATA *columnData,
long columns, int64_t rows,
short fillInZero) {
562 unsigned short ushortValue;
567 uint64_t ulong64Value;
571 for (column = 0; column < columns; column++) {
572 ptr = getToken(ptr, separator, startDelim, endDelim, t);
574 fprintf(stderr,
"token: >%s<\n", t);
579 if (nullData && spanLines) {
582 switch (columnData[column].type) {
584 if (nullData || sscanf(t,
"%hd", &shortValue) != 1) {
591 shortValue = ((
short *)SDDSout->data[columnData[column].index])[rows - 1];
594 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, shortValue, -1))
598 if (nullData || sscanf(t,
"%hu", &ushortValue) != 1) {
605 ushortValue = ((
unsigned short *)SDDSout->data[columnData[column].index])[rows - 1];
608 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ushortValue, -1))
612 if (nullData || sscanf(t,
"%" SCNd32, &longValue) != 1) {
619 longValue = ((int32_t *)SDDSout->data[columnData[column].index])[rows - 1];
622 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, longValue, -1))
626 if (nullData || sscanf(t,
"%" SCNu32, &ulongValue) != 1) {
633 ulongValue = ((uint32_t *)SDDSout->data[columnData[column].index])[rows - 1];
636 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ulongValue, -1))
640 if (nullData || sscanf(t,
"%" SCNd64, &long64Value) != 1) {
647 long64Value = ((int64_t *)SDDSout->data[columnData[column].index])[rows - 1];
650 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, long64Value, -1))
654 if (nullData || sscanf(t,
"%" SCNu64, &ulong64Value) != 1) {
661 ulong64Value = ((uint64_t *)SDDSout->data[columnData[column].index])[rows - 1];
664 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ulong64Value, -1))
668 if (nullData || sscanf(t,
"%f", &floatValue) != 1) {
675 floatValue = ((
float *)SDDSout->data[columnData[column].index])[rows - 1];
678 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, floatValue, -1))
682 if (nullData || sscanf(t,
"%lf", &doubleValue) != 1) {
689 doubleValue = ((
double *)SDDSout->data[columnData[column].index])[rows - 1];
692 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, doubleValue, -1))
696 if (nullData || sscanf(t,
"%c", &charValue) != 1) {
703 charValue = ((
char *)SDDSout->data[columnData[column].index])[rows - 1];
706 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, charValue, -1))
711 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, t, -1))
715 SDDS_Bomb(
"Unknown or unsupported data type encountered.");
720void lowerstring(
char *ptr) {
723 for (i = 0; i < size; i++)
724 ptr[i] = tolower((
unsigned char)ptr[i]);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_LengthenTable(SDDS_DATASET *SDDS_dataset, int64_t n_additional_rows)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_InterpretEscapes(char *s)
Interprets and converts escape sequences in a string.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_IdentifyType(char *typeName)
Identifies the SDDS data type based on its string name.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
#define SDDS_FLOAT
Identifier for the float data type.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_CHARACTER
Identifier for the character data type.
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long fexists(const char *filename)
Checks if a file exists.
void interpret_escaped_quotes(char *s)
Processes a string to interpret and replace escaped quotation marks.
void interpret_escapes(char *s)
Interpret C escape sequences in a string.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
char * replace_chars(char *s, char *from, char *to)
Maps one set of characters to another in a given string.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
void free_scanargs(SCANNED_ARG **scanned, int argc)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
char * trim_spaces(char *s)
Trims leading and trailing spaces from a string.