SDDSlib
Loading...
Searching...
No Matches
csv2sdds.c
1/*************************************************************************\
2 * Copyright (c) 2002 The University of Chicago, as Operator of Argonne
3 * National Laboratory.
4 * Copyright (c) 2002 The Regents of the University of California, as
5 * Operator of Los Alamos National Laboratory.
6 * This file is distributed subject to a Software License Agreement found
7 * in the file LICENSE that is included with this distribution.
8\*************************************************************************/
9
10/*
11 *
12 $Log: not supported by cvs2svn $
13 Revision 1.26 2010/07/29 21:12:00 soliday
14 If a numerical column is empty it will now use the value from the previous
15 row. If the first rows has empty values it will use the previous behaviour
16 of substituting the largest possible value for that data type.
17
18 Revision 1.25 2010/07/29 20:17:05 soliday
19 Added the -uselabel option for files that have column names and units in the
20 file before the data.
21
22 Revision 1.24 2009/11/18 00:56:04 lemery
23 Allow program to proceed when the lines that are skipped have no
24 printable characters. Previously the input file would be closed on
25 first instance of no character in a line.
26
27 Revision 1.23 2006/02/02 17:45:09 soliday
28 Fixed bug in the call to SDDS_LengthenTable. It was passing the desired number
29 of rows instead of the desired increase in rows.
30
31 Revision 1.22 2003/09/02 19:16:01 soliday
32 Cleaned up code for Linux.
33
34 Revision 1.21 2002/08/14 17:12:36 soliday
35 Added Open License
36
37 Revision 1.20 2002/03/22 22:59:53 soliday
38 Modifed free_scanargs argument.
39
40 Revision 1.19 2002/01/28 16:51:05 soliday
41 Added free_scanargs
42
43 Revision 1.18 2001/09/05 20:20:00 shang
44 add -skiplines option to the syntax which was forgot in previous version.
45
46 Revision 1.17 2001/09/05 20:06:53 shang
47 add -skiplines option and fix the read schFile problem, which did not recognize
48 "double, string" data type and mistook "float" as "double"
49
50 Revision 1.16 2001/01/23 19:14:54 soliday
51 Standardized usage message.
52
53 Revision 1.15 2000/10/05 17:20:33 soliday
54 Increased the 1024 character limit per line to 10240 so that it can handle
55 really big files.
56
57 Revision 1.14 2000/09/19 20:10:49 borland
58 Fixed bug introduced when units capability was introduced by D. Blachowicz.
59
60 Revision 1.13 2000/05/22 16:18:17 soliday
61 Fixed a bug so that it calls SDDS_LengthenTable one row earlier.
62
63 Revision 1.12 2000/04/06 13:48:49 borland
64 Per D. Blachowicz: added units qualifier to -column option.
65
66 Revision 1.11 1999/09/14 18:29:29 soliday
67 interpret_escaped_quotes is no longer defined locally
68
69 Revision 1.10 1999/05/25 19:02:20 soliday
70 Removed compiler warning on linux.
71
72 Revision 1.9 1999/01/06 19:54:34 borland
73 Fixed the version number in the usage message.
74
75 Revision 1.8 1997/08/25 19:20:29 borland
76 Added option to allow setting the maximum number of rows per page.
77 Should find a way to eliminate the need for this.
78
79 Revision 1.7 1997/07/16 22:06:36 borland
80 Added -spanLines option, which permits parsing data were the rows occupy
81 multiple lines.
82
83 Revision 1.6 1997/03/19 19:08:31 borland
84 Added exit(0) for normal termination.
85
86 Revision 1.5 1997/03/06 22:20:16 borland
87 Now interprets escapes in separator arguments.
88
89 Revision 1.4 1996/11/11 00:55:54 borland
90 Fixed bug that caused truncation of tokens from lines with separators but
91 no delimiters.
92
93 Revision 1.3 1996/11/07 20:25:33 borland
94 Now ignores all lines in SCH file that don't have the form <tag>=<valueList>.
95
96 Revision 1.2 1996/11/07 19:56:23 borland
97 Added pipe option.
98
99 Revision 1.1 1996/11/01 19:59:28 borland
100 First version.
101
102 *
103 */
104#include "mdb.h"
105#include "SDDS.h"
106#include "scan.h"
107#include <ctype.h>
108
109#define SET_ASCIIOUTPUT 0
110#define SET_DELIMITERS 1
111#define SET_SEPARATOR 2
112#define SET_COLUMNDATA 3
113#define SET_SCHFILE 4
114#define SET_PIPE 5
115#define SET_SPANLINES 6
116#define SET_MAXROWS 7
117#define SET_SKIPLINES 8
118#define SET_USELABELS 9
119#define SET_MAJOR_ORDER 10
120#define SET_FILL_IN 11
121#define N_OPTIONS 12
122
123char *option[N_OPTIONS] = {
124 "asciioutput",
125 "delimiters",
126 "separator",
127 "columndata",
128 "schfile",
129 "pipe",
130 "spanlines",
131 "maxrows",
132 "skiplines",
133 "uselabels",
134 "majorOrder",
135 "fillIn",
136};
137
138char *USAGE = "csv2sdds [<inputFile>] [<outputFile>] [-pipe[=in][,out]]\n\
139[-asciiOutput] [-spanLines] [-maxRows=<number>]\n\
140[-schfile=<filename>] [-skiplines=<number>]\n\
141[-delimiters=start=<start>,end=<char>] [-separator=<char>]\n\
142[-columnData=name=<name>,type=<type>,units=<units>...]\n\
143[-uselabels[=units]] [-majorOrder=row|column]\n\
144[-fillIn=<zero|last>]\n\n\
145pipe SDDS toolkit pipe option.\n\
146asciiOutput Requests SDDS ASCII output. Default is binary.\n\
147spanLines Ignore line breaks in parsing the input data.\n\
148schFile Gives the name of the SCH file that describes the\n\
149 columns in the <inputFile>.\n\
150delimiter Gives the delimiter characters that bracket fields.\n\
151 The default is \" for both the start and end delimiter.\n\
152separator Gives the separator character between fields. The default\n\
153 is ,\n\
154skiplines skip the first <number> lines of input file. \n\
155columnData Gives the name of a column in <inputFile> along with the\n\
156 SDDS data type for the column (one of short, long, float, double,\n\
157 character, or string), and optional units input.\n\
158 -column options must be given in the\n\
159 order corresponding to the order of the data in <inputFile>.\n\
160uselabels The column names and optionally the units are defined in the\n\
161 file prior to the data.\n\
162maxRows Maximum number of rows to expect in input.\n\
163majorOrder writes output file in row or column major order\n\
164fillIn Use 0 or the last value for empty cells. The default is 0.\n\n\
165Converts Comma Separated Values data to SDDS.\n\
166Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
167
168typedef struct
169{
170 char *name, *units;
171 long type, index;
173
174long ParseSchFile(char *file, COLUMN_DATA **columnData, char *separator, char *startDelim, char *endDelim);
175void SetUpOutputFile(SDDS_DATASET *SDDSout, char *input, char *output, COLUMN_DATA *columnData, long columns,
176 long asciiOutput, short columnMajorOrder);
177char *getToken(char *s, char separator, char startDelim, char endDelim, char *buffer);
178void writeOneRowToOutputFile(SDDS_DATASET *SDDSout, char *ptr, char separator, char startDelim, char endDelim,
179 long spanLines, COLUMN_DATA *columnData, long columns, int64_t rows, short fillInZero);
180void lowerstring(char *ptr);
181
182int main(int argc, char **argv) {
183 FILE *fpi;
184 char *input, *output, *schFile;
185 SDDS_DATASET SDDSout;
186 SCANNED_ARG *scanned;
187 long i, iArg;
188 int64_t rows, maxRows;
189 long asciiOutput, columns, spanLines, skipLines = 0, lines;
190 short columnlabels = 0, unitlabels = 0, uselabels = 0;
191 COLUMN_DATA *columnData;
192 char separator, startDelim, endDelim;
193 char s[10240], *ptr, *typeName, *unitsName;
194 unsigned long dummyFlags, pipeFlags, majorOrderFlag, fillInFlag;
195 short columnMajorOrder = 0, fillInZero = 1;
196
198 argc = scanargs(&scanned, argc, argv);
199 if (argc < 3)
200 bomb(NULL, USAGE);
201 input = output = schFile = NULL;
202 asciiOutput = spanLines = columns = 0;
203 pipeFlags = 0;
204 columnData = NULL;
205 separator = ',';
206 startDelim = '\"';
207 endDelim = '\"';
208 maxRows = 10000;
209
210 for (iArg = 1; iArg < argc; iArg++) {
211 if (scanned[iArg].arg_type == OPTION) {
212 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
213 case SET_MAJOR_ORDER:
214 majorOrderFlag = 0;
215 scanned[iArg].n_items--;
216 if (scanned[iArg].n_items > 0 &&
217 (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
218 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
219 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
220 SDDS_Bomb("invalid -majorOrder syntax/values");
221 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
222 columnMajorOrder = 1;
223 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
224 columnMajorOrder = 0;
225 break;
226 case SET_ASCIIOUTPUT:
227 asciiOutput = 1;
228 break;
229 case SET_FILL_IN:
230 fillInFlag = 0;
231 scanned[iArg].n_items--;
232 if (scanned[iArg].n_items > 0 &&
233 (!scanItemList(&fillInFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
234 "zero", -1, NULL, 0, 0x0001UL,
235 "last", -1, NULL, 0, 0x0002UL, NULL)))
236 SDDS_Bomb("invalid -fillIn syntax/values");
237 if (fillInFlag & 0x0001UL)
238 fillInZero = 1;
239 else if (fillInFlag & 0x0002UL)
240 fillInZero = 0;
241 break;
242 case SET_DELIMITERS:
243 if (!(scanned[iArg].n_items -= 1) ||
244 !scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
245 "start", SDDS_CHARACTER, &startDelim, 1, 0,
246 "end", SDDS_CHARACTER, &endDelim, 1, 0, NULL)) {
247 SDDS_Bomb("invalid -delimiters syntax");
248 }
249 scanned[iArg].n_items++;
250 break;
251 case SET_SEPARATOR:
252 if (scanned[iArg].n_items != 2 || strlen(scanned[iArg].list[1]) < 1)
253 SDDS_Bomb("invalid -separator syntax");
254 interpret_escapes(scanned[iArg].list[1]);
255 separator = scanned[iArg].list[1][0];
256 break;
257 case SET_COLUMNDATA:
258 columnData = SDDS_Realloc(columnData, sizeof(*columnData) * (columns + 1));
259 columnData[columns].name = NULL;
260 columnData[columns].units = NULL;
261 unitsName = NULL;
262 typeName = "string";
263 if (!(scanned[iArg].n_items -= 1) ||
264 !scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
265 "name", SDDS_STRING, &(columnData[columns].name), 1, 0,
266 "units", SDDS_STRING, &unitsName, 1, 0,
267 "type", SDDS_STRING, &typeName, 1, 0, NULL) ||
268 !columnData[columns].name ||
269 !strlen(columnData[columns].name) ||
270 !typeName ||
271 !(columnData[columns].type = SDDS_IdentifyType(typeName)))
272 SDDS_Bomb("invalid -columnData syntax");
273 scanned[iArg].n_items++;
274 columnData[columns].units = unitsName;
275 columns++;
276 break;
277 case SET_SCHFILE:
278 if (scanned[iArg].n_items != 2)
279 SDDS_Bomb("invalid -schFile syntax");
280 schFile = scanned[iArg].list[1];
281 if (!fexists(schFile)) {
282 fprintf(stderr, "file not found: %s (csv2sdds)\n", schFile);
283 exit(1);
284 }
285 break;
286 case SET_PIPE:
287 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
288 SDDS_Bomb("invalid -pipe syntax");
289 break;
290 case SET_SPANLINES:
291 spanLines = 1;
292 break;
293 case SET_MAXROWS:
294 if (scanned[iArg].n_items != 2 ||
295 strlen(scanned[iArg].list[1]) < 1 ||
296 sscanf(scanned[iArg].list[1], "%" SCNd64, &maxRows) != 1 ||
297 maxRows < 1)
298 SDDS_Bomb("invalid -maxRows syntax");
299 break;
300 case SET_SKIPLINES:
301 if (scanned[iArg].n_items != 2 ||
302 strlen(scanned[iArg].list[1]) < 1 ||
303 sscanf(scanned[iArg].list[1], "%ld", &skipLines) != 1 ||
304 skipLines < 1)
305 SDDS_Bomb("invalid -skipline syntax");
306 break;
307 case SET_USELABELS:
308 if (scanned[iArg].n_items > 2)
309 SDDS_Bomb("invalid -uselabels syntax");
310 uselabels = 1;
311 columnlabels = 1;
312 if (scanned[iArg].n_items == 2)
313 unitlabels = 1;
314 break;
315 default:
316 bomb("invalid option seen", USAGE);
317 break;
318 }
319 } else {
320 if (!input)
321 input = scanned[iArg].list[0];
322 else if (!output)
323 output = scanned[iArg].list[0];
324 else
325 bomb("too many filenames", USAGE);
326 }
327 }
328
329 if (!columns && !schFile && !columnlabels)
330 SDDS_Bomb("either give one or more -columnData options or -schFile option or -uselabels option");
331 if (columns && schFile)
332 SDDS_Bomb("either give one or more -columnData options or -uselabels option");
333 if (columns && columnlabels)
334 SDDS_Bomb("either give one or more -columnData options or -uselabels option");
335 if (schFile && columnlabels)
336 SDDS_Bomb("either give -schFile options or -uselabels option");
337
338 processFilenames("csv2sdds", &input, &output, pipeFlags, 0, NULL);
339 if (input) {
340 if (!fexists(input))
341 SDDS_Bomb("input file not found");
342 if (!(fpi = fopen(input, "r")))
343 SDDS_Bomb("problem opening input file");
344 } else {
345 fpi = stdin;
346 }
347
348 if (!columnlabels) {
349 if (!columns && !(columns = ParseSchFile(schFile, &columnData, &separator, &startDelim, &endDelim)))
350 SDDS_Bomb("problem reading or parsing sch file");
351
352 SetUpOutputFile(&SDDSout, input, output, columnData, columns, asciiOutput, columnMajorOrder);
353
354 if (!SDDS_StartPage(&SDDSout, maxRows))
355 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
356 }
357 rows = 0; /* the row index we are storing in */
358 lines = 0;
359
360 while (fgets(s, 10240, fpi)) {
361 lines++;
362 /* convert unprintable character to a null */
363 while ((i = strlen(s)) && s[i - 1] < 27)
364 s[i - 1] = 0;
365 /* I added the second condition because sometimes we get files that
366 has headers of fixed number of lines that are garbage.
367 The loop will break only when real data has first been read */
368 if (strlen(s) == 0 && (skipLines && (lines > skipLines)))
369 break;
370 ptr = s;
371#if defined(DEBUG)
372 fprintf(stderr, "line: >%s<\n", ptr);
373#endif
374 if (columnlabels && (!skipLines || (lines > skipLines))) {
375 char t[10240];
376 t[0] = 0;
377 while (1) {
378 ptr = getToken(ptr, separator, startDelim, endDelim, t);
379 if (strlen(t) == 0)
380 break;
381 columnData = SDDS_Realloc(columnData, sizeof(*columnData) * (columns + 1));
382 columnData[columns].name = malloc(sizeof(char *) * strlen(t));
383 replace_chars(t, (char *)" ", (char *)"_");
384 sprintf(columnData[columns].name, "%s", t);
385 columnData[columns].units = NULL;
386 columnData[columns].type = SDDS_STRING;
387 columns++;
388 }
389 columnlabels = 0;
390 continue;
391 } else if (unitlabels && (!skipLines || (lines > skipLines))) {
392 char t[10240];
393 t[0] = 0;
394 for (i = 0; i < columns; i++) {
395 ptr = getToken(ptr, separator, startDelim, endDelim, t);
396 if (strlen(t) == 0)
397 break;
398 columnData[i].units = malloc(sizeof(char *) * strlen(t));
399 sprintf(columnData[i].units, "%s", t);
400 }
401 unitlabels = 0;
402 continue;
403 }
404 if (uselabels) {
405 char *tmpPtr;
406 char t[10240];
407 double tD;
408 t[0] = 0;
409 tmpPtr = ptr;
410 for (i = 0; i < columns; i++) {
411 tmpPtr = getToken(tmpPtr, separator, startDelim, endDelim, t);
412 if (strlen(t) == 0)
413 break;
414 if (sscanf(t, "%lf", &tD) == 1)
415 columnData[i].type = SDDS_DOUBLE;
416 }
417 SetUpOutputFile(&SDDSout, input, output, columnData, columns, asciiOutput, columnMajorOrder);
418 if (!SDDS_StartPage(&SDDSout, maxRows))
419 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
420 uselabels = 0;
421 }
422 if (!skipLines || (lines > skipLines)) {
423 writeOneRowToOutputFile(&SDDSout, ptr, separator, startDelim, endDelim, spanLines, columnData, columns, rows, fillInZero);
424 rows++;
425 }
426 if (rows >= maxRows - 1) {
427 if (!SDDS_LengthenTable(&SDDSout, 1000)) {
428 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
429 }
430 maxRows += 1000;
431 }
432 }
433
434 fclose(fpi);
435 if (!SDDS_WritePage(&SDDSout) || !SDDS_Terminate(&SDDSout))
436 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
437
438 free_scanargs(&scanned, argc);
439
440 return (0);
441}
442
443long ParseSchFile(char *file, COLUMN_DATA **columnData, char *separator, char *startDelim, char *endDelim) {
444 FILE *fp;
445 char s[10240], *ptr, *ptr0;
446 long l, fieldIndex, lastFieldIndex, columns;
447
448 if (!(fp = fopen(file, "r")))
449 SDDS_Bomb("Unable to open SCH file");
450
451 lastFieldIndex = 0;
452 columns = 0;
453 while (fgets(s, 10240, fp)) {
454 while ((l = strlen(s)) && s[l - 1] < 27)
455 s[l - 1] = 0;
456 if (strlen(s) == 0)
457 continue;
458 if (!(ptr = strchr(s, '=')))
459 continue;
460 *ptr++ = 0;
461 if (strcmp(s, "Filetype") == 0) {
462 if (strcmp(ptr, "Delimited"))
463 SDDS_Bomb("Require Filetype = Delimited in SCH file.");
464 } else if (strcmp(s, "Separator") == 0) {
465 if (!(*separator = *ptr))
466 SDDS_Bomb("Null separator in SCH file.");
467 } else if (strcmp(s, "Delimiter") == 0) {
468 if (!(*endDelim = *startDelim = *ptr))
469 SDDS_Bomb("Null delimiter in SCH file.");
470 } else if (strcmp(s, "CharSet") == 0) {
471 if (strcmp(ptr, "ascii"))
472 SDDS_Bomb("Require CharSet = ascii in SCH file.");
473 } else if (strncmp(s, "Field", strlen("Field")) == 0) {
474 if (!sscanf(s, "Field%ld", &fieldIndex))
475 SDDS_Bomb("Error scanning field index in SCH file.");
476 if (fieldIndex - lastFieldIndex != 1)
477 SDDS_Bomb("Gap or nonmonotonicity in field index values.");
478 lastFieldIndex = fieldIndex;
479 *columnData = SDDS_Realloc(*columnData, sizeof(**columnData) * (columns + 1));
480 delete_chars(ptr, " ");
481 ptr0 = ptr;
482 if (!(ptr = strchr(ptr0, ',')))
483 SDDS_Bomb("Field name not found.");
484 *ptr = 0;
485 SDDS_CopyString(&((*columnData)[columns].name), ptr0);
486 (*columnData)[columns].units = NULL;
487 ptr0 = ptr + 1;
488 if (!(ptr = strchr(ptr0, ',')))
489 SDDS_Bomb("Field type not found.");
490 *ptr = 0;
491
492 lowerstring(ptr0);
493 if (strcmp(ptr0, "string") == 0)
494 (*columnData)[columns].type = SDDS_STRING;
495 else if (strcmp(ptr0, "char") == 0)
496 (*columnData)[columns].type = SDDS_STRING;
497 else if (strcmp(ptr0, "float") == 0)
498 (*columnData)[columns].type = SDDS_FLOAT;
499 else if (strcmp(ptr0, "double") == 0)
500 (*columnData)[columns].type = SDDS_DOUBLE;
501 else {
502 fprintf(stderr, "Unknow type '%s' given to '%s'\n", ptr0, (*columnData)[columns].name);
503 exit(1);
504 }
505 columns++;
506 } else {
507 fprintf(stderr, "Warning: unknown tag value in SCH file: %s\n", s);
508 }
509 }
510 return columns;
511}
512
513void SetUpOutputFile(SDDS_DATASET *SDDSout, char *input, char *output, COLUMN_DATA *columnData, long columns, long asciiOutput, short columnMajorOrder) {
514 char s[10240];
515 long i;
516
517 sprintf(s, "csv2sdds conversion of %s", input ? input : "stdin");
518
519 if (!SDDS_InitializeOutput(SDDSout, asciiOutput ? SDDS_ASCII : SDDS_BINARY, 1, NULL, s, output))
520 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
521 SDDSout->layout.data_mode.column_major = columnMajorOrder;
522 for (i = 0; i < columns; i++) {
523 if ((columnData[i].index = SDDS_DefineColumn(SDDSout, columnData[i].name, NULL, columnData[i].units, NULL, NULL, columnData[i].type, 0)) < 0) {
524 sprintf(s, "Problem defining column %s.", columnData[i].name);
525 SDDS_SetError(s);
526 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
527 }
528 }
529 if (!SDDS_WriteLayout(SDDSout))
530 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
531}
532
533char *getToken(char *s, /* the string to be scanned */
534 char separator, /* typically , */
535 char startDelim, /* typically " */
536 char endDelim, /* typically " */
537 char *buffer /* place to put the result */
538) {
539 char *ptr;
540 char *seek_level();
541 if (*s == 0) {
542 buffer[0] = 0;
543 return s;
544 }
545
546 if (*s == separator) {
547 /* zero-length token */
548 buffer[0] = 0;
549 /* advance to next position */
550 return s + 1;
551 }
552
553 /* Check for quotes. If found, return quote-bounded data. */
554 if (*s == startDelim) {
555 s++;
556 ptr = s;
557 while (*ptr) {
558 if (*ptr == endDelim && *(ptr - 1) != '\\') {
559 ptr++;
560 break;
561 }
562 ptr++;
563 }
564 strncpy(buffer, s, ptr - s - 1);
565 buffer[ptr - s - 1] = 0;
567 if (*ptr && *ptr == separator)
568 return ptr + 1;
569 return ptr;
570 }
571
572 /* advance until the next separator is found */
573 ptr = s;
574 while (*ptr && *ptr != separator)
575 ptr++;
576 if (*ptr == separator) {
577 strncpy(buffer, s, ptr - s);
578 buffer[ptr - s] = 0;
579 return ptr + 1;
580 }
581 strcpy(buffer, s);
582 buffer[ptr - s] = 0;
583 return ptr;
584}
585
586void writeOneRowToOutputFile(SDDS_DATASET *SDDSout, char *ptr, char separator, char startDelim, char endDelim, long spanLines, COLUMN_DATA *columnData, long columns, int64_t rows, short fillInZero) {
587 int column = 0;
588 char t[10240];
589 double doubleValue;
590 float floatValue;
591 short shortValue;
592 unsigned short ushortValue;
593 long nullData = 0;
594 int32_t longValue;
595 uint32_t ulongValue;
596 int64_t long64Value;
597 uint64_t ulong64Value;
598 char charValue;
599 t[0] = 0;
600
601 for (column = 0; column < columns; column++) {
602 ptr = getToken(ptr, separator, startDelim, endDelim, t);
603#if defined(DEBUG)
604 fprintf(stderr, "token: >%s<\n", t);
605#endif
606 nullData = 0;
607 if (strlen(trim_spaces(t)) == 0)
608 nullData = 1;
609 if (nullData && spanLines) {
610 break;
611 }
612 switch (columnData[column].type) {
613 case SDDS_SHORT:
614 if (nullData || !sscanf(t, "%hd", &shortValue)) {
615 if (fillInZero) {
616 shortValue = 0;
617 } else {
618 if (rows == 0)
619 shortValue = 0;
620 else
621 shortValue = ((short *)SDDSout->data[columnData[column].index])[rows - 1];
622 }
623 }
624 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, shortValue, -1))
625 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
626 break;
627 case SDDS_USHORT:
628 if (nullData || !sscanf(t, "%hu", &ushortValue)) {
629 if (fillInZero) {
630 ushortValue = 0;
631 } else {
632 if (rows == 0)
633 ushortValue = 0;
634 else
635 ushortValue = ((unsigned short *)SDDSout->data[columnData[column].index])[rows - 1];
636 }
637 }
638 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ushortValue, -1))
639 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
640 break;
641 case SDDS_LONG:
642 if (nullData || !sscanf(t, "%" SCNd32, &longValue)) {
643 if (fillInZero) {
644 longValue = 0;
645 } else {
646 if (rows == 0)
647 longValue = 0;
648 else
649 longValue = ((int32_t *)SDDSout->data[columnData[column].index])[rows - 1];
650 }
651 }
652 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, longValue, -1))
653 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
654 break;
655 case SDDS_ULONG:
656 if (nullData || !sscanf(t, "%" SCNu32, &ulongValue)) {
657 if (fillInZero) {
658 ulongValue = 0;
659 } else {
660 if (rows == 0)
661 ulongValue = 0;
662 else
663 ulongValue = ((uint32_t *)SDDSout->data[columnData[column].index])[rows - 1];
664 }
665 }
666 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ulongValue, -1))
667 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
668 break;
669 case SDDS_LONG64:
670 if (nullData || !sscanf(t, "%" SCNd64, &long64Value)) {
671 if (fillInZero) {
672 long64Value = 0;
673 } else {
674 if (rows == 0)
675 long64Value = 0;
676 else
677 long64Value = ((int64_t *)SDDSout->data[columnData[column].index])[rows - 1];
678 }
679 }
680 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, long64Value, -1))
681 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
682 break;
683 case SDDS_ULONG64:
684 if (nullData || !sscanf(t, "%" SCNu64, &ulong64Value)) {
685 if (fillInZero) {
686 ulong64Value = 0;
687 } else {
688 if (rows == 0)
689 ulong64Value = 0;
690 else
691 ulong64Value = ((uint64_t *)SDDSout->data[columnData[column].index])[rows - 1];
692 }
693 }
694 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, ulong64Value, -1))
695 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
696 break;
697 case SDDS_FLOAT:
698 if (nullData || !sscanf(t, "%f", &floatValue)) {
699 if (fillInZero) {
700 floatValue = 0;
701 } else {
702 if (rows == 0)
703 floatValue = 0;
704 else
705 floatValue = ((float *)SDDSout->data[columnData[column].index])[rows - 1];
706 }
707 }
708 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, floatValue, -1))
709 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
710 break;
711 case SDDS_DOUBLE:
712 if (nullData || !sscanf(t, "%lf", &doubleValue)) {
713 if (fillInZero) {
714 doubleValue = 0;
715 } else {
716 if (rows == 0)
717 doubleValue = 0;
718 else
719 doubleValue = ((double *)SDDSout->data[columnData[column].index])[rows - 1];
720 }
721 }
722 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, doubleValue, -1))
723 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
724 break;
725 case SDDS_CHARACTER:
726 if (nullData || !sscanf(t, "%c", &charValue)) {
727 if (fillInZero) {
728 charValue = 0;
729 } else {
730 if (rows == 0)
731 charValue = 0;
732 else
733 charValue = ((char *)SDDSout->data[columnData[column].index])[rows - 1];
734 }
735 }
736 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, charValue, -1))
737 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
738 break;
739 case SDDS_STRING:
741 if (!SDDS_SetRowValues(SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, rows, columnData[column].index, t, -1))
742 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
743 break;
744 default:
745 SDDS_Bomb("Unknown/unsupported data type encountered.");
746 exit(1);
747 }
748 }
749}
750
751void lowerstring(char *ptr) {
752 int size, i;
753 size = strlen(ptr);
754 for (i = 0; i < size; i++)
755 ptr[i] = tolower(ptr[i]);
756}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_LengthenTable(SDDS_DATASET *SDDS_dataset, int64_t n_additional_rows)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_InterpretEscapes(char *s)
Interprets and converts escape sequences in a string.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_IdentifyType(char *typeName)
Identifies the SDDS data type based on its string name.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
Definition SDDStypes.h:67
#define SDDS_FLOAT
Identifier for the float data type.
Definition SDDStypes.h:43
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
Definition SDDStypes.h:55
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_CHARACTER
Identifier for the character data type.
Definition SDDStypes.h:91
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
Definition SDDStypes.h:79
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long fexists(const char *filename)
Checks if a file exists.
Definition fexists.c:27
void interpret_escaped_quotes(char *s)
Processes a string to interpret and replace escaped quotation marks.
void interpret_escapes(char *s)
Interpret C escape sequences in a string.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
char * replace_chars(char *s, char *from, char *to)
Maps one set of characters to another in a given string.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
char * trim_spaces(char *s)
Trims leading and trailing spaces from a string.
Definition trim_spaces.c:28