109#define SET_ASCIIOUTPUT 0
110#define SET_DELIMITERS 1
111#define SET_SEPARATOR 2
112#define SET_COLUMNDATA 3
115#define SET_SPANLINES 6
117#define SET_SKIPLINES 8
118#define SET_USELABELS 9
119#define SET_MAJOR_ORDER 10
120#define SET_FILL_IN 11
123char *option[N_OPTIONS] = {
138char *USAGE =
"csv2sdds [<inputFile>] [<outputFile>] [-pipe[=in][,out]]\n\
139[-asciiOutput] [-spanLines] [-maxRows=<number>]\n\
140[-schfile=<filename>] [-skiplines=<number>]\n\
141[-delimiters=start=<start>,end=<char>] [-separator=<char>]\n\
142[-columnData=name=<name>,type=<type>,units=<units>...]\n\
143[-uselabels[=units]] [-majorOrder=row|column]\n\
144[-fillIn=<zero|last>]\n\n\
145pipe SDDS toolkit pipe option.\n\
146asciiOutput Requests SDDS ASCII output. Default is binary.\n\
147spanLines Ignore line breaks in parsing the input data.\n\
148schFile Gives the name of the SCH file that describes the\n\
149 columns in the <inputFile>.\n\
150delimiter Gives the delimiter characters that bracket fields.\n\
151 The default is \" for both the start and end delimiter.\n\
152separator Gives the separator character between fields. The default\n\
154skiplines skip the first <number> lines of input file. \n\
155columnData Gives the name of a column in <inputFile> along with the\n\
156 SDDS data type for the column (one of short, long, float, double,\n\
157 character, or string), and optional units input.\n\
158 -column options must be given in the\n\
159 order corresponding to the order of the data in <inputFile>.\n\
160uselabels The column names and optionally the units are defined in the\n\
161 file prior to the data.\n\
162maxRows Maximum number of rows to expect in input.\n\
163majorOrder writes output file in row or column major order\n\
164fillIn Use 0 or the last value for empty cells. The default is 0.\n\n\
165Converts Comma Separated Values data to SDDS.\n\
166Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
174long ParseSchFile(
char *file,
COLUMN_DATA **columnData,
char *separator,
char *startDelim,
char *endDelim);
176 long asciiOutput,
short columnMajorOrder);
177char *getToken(
char *s,
char separator,
char startDelim,
char endDelim,
char *buffer);
178void writeOneRowToOutputFile(
SDDS_DATASET *SDDSout,
char *ptr,
char separator,
char startDelim,
char endDelim,
179 long spanLines,
COLUMN_DATA *columnData,
long columns, int64_t rows,
short fillInZero);
180void lowerstring(
char *ptr);
182int main(
int argc,
char **argv) {
184 char *input, *output, *schFile;
186 SCANNED_ARG *scanned;
188 int64_t rows, maxRows;
189 long asciiOutput, columns, spanLines, skipLines = 0, lines;
190 short columnlabels = 0, unitlabels = 0, uselabels = 0;
192 char separator, startDelim, endDelim;
193 char s[10240], *ptr, *typeName, *unitsName;
194 unsigned long dummyFlags, pipeFlags, majorOrderFlag, fillInFlag;
195 short columnMajorOrder = 0, fillInZero = 1;
198 argc =
scanargs(&scanned, argc, argv);
201 input = output = schFile = NULL;
202 asciiOutput = spanLines = columns = 0;
210 for (iArg = 1; iArg < argc; iArg++) {
211 if (scanned[iArg].arg_type == OPTION) {
212 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
213 case SET_MAJOR_ORDER:
215 scanned[iArg].n_items--;
216 if (scanned[iArg].n_items > 0 &&
217 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
218 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
219 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
220 SDDS_Bomb(
"invalid -majorOrder syntax/values");
221 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
222 columnMajorOrder = 1;
223 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
224 columnMajorOrder = 0;
226 case SET_ASCIIOUTPUT:
231 scanned[iArg].n_items--;
232 if (scanned[iArg].n_items > 0 &&
233 (!
scanItemList(&fillInFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
234 "zero", -1, NULL, 0, 0x0001UL,
235 "last", -1, NULL, 0, 0x0002UL, NULL)))
236 SDDS_Bomb(
"invalid -fillIn syntax/values");
237 if (fillInFlag & 0x0001UL)
239 else if (fillInFlag & 0x0002UL)
243 if (!(scanned[iArg].n_items -= 1) ||
244 !
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
249 scanned[iArg].n_items++;
252 if (scanned[iArg].n_items != 2 || strlen(scanned[iArg].list[1]) < 1)
255 separator = scanned[iArg].list[1][0];
258 columnData =
SDDS_Realloc(columnData,
sizeof(*columnData) * (columns + 1));
259 columnData[columns].name = NULL;
260 columnData[columns].units = NULL;
263 if (!(scanned[iArg].n_items -= 1) ||
264 !
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
265 "name",
SDDS_STRING, &(columnData[columns].name), 1, 0,
268 !columnData[columns].name ||
269 !strlen(columnData[columns].name) ||
273 scanned[iArg].n_items++;
274 columnData[columns].units = unitsName;
278 if (scanned[iArg].n_items != 2)
280 schFile = scanned[iArg].list[1];
282 fprintf(stderr,
"file not found: %s (csv2sdds)\n", schFile);
287 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
294 if (scanned[iArg].n_items != 2 ||
295 strlen(scanned[iArg].list[1]) < 1 ||
296 sscanf(scanned[iArg].list[1],
"%" SCNd64, &maxRows) != 1 ||
301 if (scanned[iArg].n_items != 2 ||
302 strlen(scanned[iArg].list[1]) < 1 ||
303 sscanf(scanned[iArg].list[1],
"%ld", &skipLines) != 1 ||
308 if (scanned[iArg].n_items > 2)
312 if (scanned[iArg].n_items == 2)
316 bomb(
"invalid option seen", USAGE);
321 input = scanned[iArg].list[0];
323 output = scanned[iArg].list[0];
325 bomb(
"too many filenames", USAGE);
329 if (!columns && !schFile && !columnlabels)
330 SDDS_Bomb(
"either give one or more -columnData options or -schFile option or -uselabels option");
331 if (columns && schFile)
332 SDDS_Bomb(
"either give one or more -columnData options or -uselabels option");
333 if (columns && columnlabels)
334 SDDS_Bomb(
"either give one or more -columnData options or -uselabels option");
335 if (schFile && columnlabels)
336 SDDS_Bomb(
"either give -schFile options or -uselabels option");
342 if (!(fpi = fopen(input,
"r")))
349 if (!columns && !(columns = ParseSchFile(schFile, &columnData, &separator, &startDelim, &endDelim)))
350 SDDS_Bomb(
"problem reading or parsing sch file");
352 SetUpOutputFile(&SDDSout, input, output, columnData, columns, asciiOutput, columnMajorOrder);
360 while (fgets(s, 10240, fpi)) {
363 while ((i = strlen(s)) && s[i - 1] < 27)
368 if (strlen(s) == 0 && (skipLines && (lines > skipLines)))
372 fprintf(stderr,
"line: >%s<\n", ptr);
374 if (columnlabels && (!skipLines || (lines > skipLines))) {
378 ptr = getToken(ptr, separator, startDelim, endDelim, t);
381 columnData =
SDDS_Realloc(columnData,
sizeof(*columnData) * (columns + 1));
382 columnData[columns].name = malloc(
sizeof(
char *) * strlen(t));
384 sprintf(columnData[columns].name,
"%s", t);
385 columnData[columns].units = NULL;
391 }
else if (unitlabels && (!skipLines || (lines > skipLines))) {
394 for (i = 0; i < columns; i++) {
395 ptr = getToken(ptr, separator, startDelim, endDelim, t);
398 columnData[i].units = malloc(
sizeof(
char *) * strlen(t));
399 sprintf(columnData[i].units,
"%s", t);
410 for (i = 0; i < columns; i++) {
411 tmpPtr = getToken(tmpPtr, separator, startDelim, endDelim, t);
414 if (sscanf(t,
"%lf", &tD) == 1)
417 SetUpOutputFile(&SDDSout, input, output, columnData, columns, asciiOutput, columnMajorOrder);
422 if (!skipLines || (lines > skipLines)) {
423 writeOneRowToOutputFile(&SDDSout, ptr, separator, startDelim, endDelim, spanLines, columnData, columns, rows, fillInZero);
426 if (rows >= maxRows - 1) {
443long ParseSchFile(
char *file,
COLUMN_DATA **columnData,
char *separator,
char *startDelim,
char *endDelim) {
445 char s[10240], *ptr, *ptr0;
446 long l, fieldIndex, lastFieldIndex, columns;
448 if (!(fp = fopen(file,
"r")))
453 while (fgets(s, 10240, fp)) {
454 while ((l = strlen(s)) && s[l - 1] < 27)
458 if (!(ptr = strchr(s,
'=')))
461 if (strcmp(s,
"Filetype") == 0) {
462 if (strcmp(ptr,
"Delimited"))
463 SDDS_Bomb(
"Require Filetype = Delimited in SCH file.");
464 }
else if (strcmp(s,
"Separator") == 0) {
465 if (!(*separator = *ptr))
466 SDDS_Bomb(
"Null separator in SCH file.");
467 }
else if (strcmp(s,
"Delimiter") == 0) {
468 if (!(*endDelim = *startDelim = *ptr))
469 SDDS_Bomb(
"Null delimiter in SCH file.");
470 }
else if (strcmp(s,
"CharSet") == 0) {
471 if (strcmp(ptr,
"ascii"))
472 SDDS_Bomb(
"Require CharSet = ascii in SCH file.");
473 }
else if (strncmp(s,
"Field", strlen(
"Field")) == 0) {
474 if (!sscanf(s,
"Field%ld", &fieldIndex))
475 SDDS_Bomb(
"Error scanning field index in SCH file.");
476 if (fieldIndex - lastFieldIndex != 1)
477 SDDS_Bomb(
"Gap or nonmonotonicity in field index values.");
478 lastFieldIndex = fieldIndex;
479 *columnData =
SDDS_Realloc(*columnData,
sizeof(**columnData) * (columns + 1));
482 if (!(ptr = strchr(ptr0,
',')))
486 (*columnData)[columns].units = NULL;
488 if (!(ptr = strchr(ptr0,
',')))
493 if (strcmp(ptr0,
"string") == 0)
495 else if (strcmp(ptr0,
"char") == 0)
497 else if (strcmp(ptr0,
"float") == 0)
499 else if (strcmp(ptr0,
"double") == 0)
502 fprintf(stderr,
"Unknow type '%s' given to '%s'\n", ptr0, (*columnData)[columns].name);
507 fprintf(stderr,
"Warning: unknown tag value in SCH file: %s\n", s);
513void SetUpOutputFile(
SDDS_DATASET *SDDSout,
char *input,
char *output,
COLUMN_DATA *columnData,
long columns,
long asciiOutput,
short columnMajorOrder) {
517 sprintf(s,
"csv2sdds conversion of %s", input ? input :
"stdin");
521 SDDSout->layout.data_mode.column_major = columnMajorOrder;
522 for (i = 0; i < columns; i++) {
523 if ((columnData[i].index =
SDDS_DefineColumn(SDDSout, columnData[i].name, NULL, columnData[i].units, NULL, NULL, columnData[i].type, 0)) < 0) {
524 sprintf(s,
"Problem defining column %s.", columnData[i].name);
533char *getToken(
char *s,
546 if (*s == separator) {
554 if (*s == startDelim) {
558 if (*ptr == endDelim && *(ptr - 1) !=
'\\') {
564 strncpy(buffer, s, ptr - s - 1);
565 buffer[ptr - s - 1] = 0;
567 if (*ptr && *ptr == separator)
574 while (*ptr && *ptr != separator)
576 if (*ptr == separator) {
577 strncpy(buffer, s, ptr - s);
586void writeOneRowToOutputFile(
SDDS_DATASET *SDDSout,
char *ptr,
char separator,
char startDelim,
char endDelim,
long spanLines,
COLUMN_DATA *columnData,
long columns, int64_t rows,
short fillInZero) {
592 unsigned short ushortValue;
597 uint64_t ulong64Value;
601 for (column = 0; column < columns; column++) {
602 ptr = getToken(ptr, separator, startDelim, endDelim, t);
604 fprintf(stderr,
"token: >%s<\n", t);
609 if (nullData && spanLines) {
612 switch (columnData[column].type) {
614 if (nullData || !sscanf(t,
"%hd", &shortValue)) {
621 shortValue = ((
short *)SDDSout->data[columnData[column].index])[rows - 1];
624 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, shortValue, -1))
628 if (nullData || !sscanf(t,
"%hu", &ushortValue)) {
635 ushortValue = ((
unsigned short *)SDDSout->data[columnData[column].index])[rows - 1];
638 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ushortValue, -1))
642 if (nullData || !sscanf(t,
"%" SCNd32, &longValue)) {
649 longValue = ((int32_t *)SDDSout->data[columnData[column].index])[rows - 1];
652 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, longValue, -1))
656 if (nullData || !sscanf(t,
"%" SCNu32, &ulongValue)) {
663 ulongValue = ((uint32_t *)SDDSout->data[columnData[column].index])[rows - 1];
666 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ulongValue, -1))
670 if (nullData || !sscanf(t,
"%" SCNd64, &long64Value)) {
677 long64Value = ((int64_t *)SDDSout->data[columnData[column].index])[rows - 1];
680 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, long64Value, -1))
684 if (nullData || !sscanf(t,
"%" SCNu64, &ulong64Value)) {
691 ulong64Value = ((uint64_t *)SDDSout->data[columnData[column].index])[rows - 1];
694 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ulong64Value, -1))
698 if (nullData || !sscanf(t,
"%f", &floatValue)) {
705 floatValue = ((
float *)SDDSout->data[columnData[column].index])[rows - 1];
708 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, floatValue, -1))
712 if (nullData || !sscanf(t,
"%lf", &doubleValue)) {
719 doubleValue = ((
double *)SDDSout->data[columnData[column].index])[rows - 1];
722 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, doubleValue, -1))
726 if (nullData || !sscanf(t,
"%c", &charValue)) {
733 charValue = ((
char *)SDDSout->data[columnData[column].index])[rows - 1];
736 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, charValue, -1))
741 if (!
SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, t, -1))
745 SDDS_Bomb(
"Unknown/unsupported data type encountered.");
751void lowerstring(
char *ptr) {
754 for (i = 0; i < size; i++)
755 ptr[i] = tolower(ptr[i]);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_LengthenTable(SDDS_DATASET *SDDS_dataset, int64_t n_additional_rows)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_InterpretEscapes(char *s)
Interprets and converts escape sequences in a string.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_IdentifyType(char *typeName)
Identifies the SDDS data type based on its string name.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
#define SDDS_FLOAT
Identifier for the float data type.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_CHARACTER
Identifier for the character data type.
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long fexists(const char *filename)
Checks if a file exists.
void interpret_escaped_quotes(char *s)
Processes a string to interpret and replace escaped quotation marks.
void interpret_escapes(char *s)
Interpret C escape sequences in a string.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
char * replace_chars(char *s, char *from, char *to)
Maps one set of characters to another in a given string.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
void free_scanargs(SCANNED_ARG **scanned, int argc)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
char * trim_spaces(char *s)
Trims leading and trailing spaces from a string.