SDDSlib
Loading...
Searching...
No Matches
sddscombinelogfiles.c
Go to the documentation of this file.
1/**
2 * @file sddscombinelogfiles.c
3 * @brief Combine multiple log files into a single SDDS file.
4 *
5 * This program takes multiple log files in the one-PV-per-file format and merges them into a
6 * single larger SDDS file containing all the PVs. Only the timestamps common to all PVs are retained.
7 *
8 * ### Features:
9 * - Supports both input file lists and pipe output.
10 * - Ensures only common timestamps are retained in the final output.
11 * - Flexible options for overwriting existing files or using pipe-based I/O.
12 *
13 * ### Usage:
14 * ```
15 * sddscombinelogfiles [<SDDSinputfilelist>] [<SDDSoutputfile>]
16 * [-pipe=[output]] [-overwrite]
17 * ```
18 *
19 * ### Options:
20 * - **`-pipe=[output]`**: Specify pipe output for the SDDS file.
21 * - **`-overwrite`**: Overwrite the output file if it already exists.
22 *
23 * ### Example:
24 * ```
25 * sddscombinelogfiles input1.sdds input2.sdds output.sdds -overwrite
26 * ```
27 *
28 * @copyright
29 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
30 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
31 *
32 * @license
33 * This file is distributed under the terms of the Software License Agreement
34 * found in the file LICENSE included with this distribution.
35 *
36 * @author R. Soliday, L Emery
37 */
38
39#include "mdb.h"
40#include "scan.h"
41#include "SDDS.h"
42
43/* Enumeration for option types */
44enum option_type {
45 SET_PIPE,
46 SET_OVERWRITE,
47 N_OPTIONS
48};
49
50static char *option[N_OPTIONS] = {
51 "pipe",
52 "overwrite"
53};
54
55const char *USAGE =
56 "Usage: sddscombinelogfiles [<SDDSinputfilelist>] [<SDDSoutputfile>]\n"
57 " [-pipe=[output]] [-overwrite]\n\n"
58 "This program combines data logger output files that are in the one-PV-per-file format.\n"
59 "Only the timestamps present in all input files are retained in the output file.\n\n"
60 "Options:\n"
61 " -pipe=[output] Specify the pipe output.\n"
62 " -overwrite Overwrite the output file if it already exists.\n\n"
63 "Example:\n"
64 " sddscombinelogfiles input1.sdds input2.sdds output.sdds -overwrite\n\n"
65 "Link date: " __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION "\n";
66
67int main(int argc, char **argv) {
68 SDDS_DATASET SDDS_input;
69 SDDS_DATASET SDDS_output;
70 SCANNED_ARG *s_arg;
71 KEYED_EQUIVALENT **keyGroup = NULL;
72 long keyGroups = 0;
73 char **inputfile = NULL;
74 int inputfiles = 0;
75 char *outputfile = NULL;
76 int i_arg, n, row, z;
77 int64_t i, j, m, r, s;
78 unsigned long pipeFlags = 0;
79 int overwrite = 0;
80 char **columnname;
81 int32_t columnnames;
82 int dataIndex;
83 double **timeValues = NULL;
84 double **dataValues = NULL;
85 short **flag = NULL;
86 int64_t *rows = NULL;
87 char **dataNames = NULL;
88 char **uniqueDataName = NULL;
89 int uniqueDataNames = 0;
90 int page = 0;
91 int pages;
92 int found;
93 double *outputTimeValues = NULL;
94 double **outputDataValues = NULL;
95 int64_t allocated_rows = 0;
96 int **array = NULL;
97 int *arrayCount;
98
100 argc = scanargs(&s_arg, argc, argv);
101
102 if (argc < 3) {
103 fprintf(stderr, "%s", USAGE);
104 return EXIT_FAILURE;
105 }
106
107 for (i_arg = 1; i_arg < argc; i_arg++) {
108 if (s_arg[i_arg].arg_type == OPTION) {
109 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
110 case SET_OVERWRITE:
111 overwrite = 1;
112 break;
113 case SET_PIPE:
114 if (!processPipeOption(s_arg[i_arg].list + 1,
115 s_arg[i_arg].n_items - 1,
116 &pipeFlags)) {
117 fprintf(stderr, "Error: Invalid -pipe option syntax.\n");
118 return EXIT_FAILURE;
119 }
120 if (pipeFlags & USE_STDIN) {
121 fprintf(stderr, "Error: -pipe=in is not supported.\n");
122 return EXIT_FAILURE;
123 }
124 break;
125 default:
126 fprintf(stderr, "Error: Unrecognized option.\n%s", USAGE);
127 return EXIT_FAILURE;
128 }
129 } else {
130 inputfile = trealloc(inputfile, sizeof(*inputfile) * (inputfiles + 1));
131 inputfile[inputfiles++] = s_arg[i_arg].list[0];
132 }
133 }
134
135 if (inputfiles > 1) {
136 if (!(pipeFlags & USE_STDOUT)) {
137 outputfile = inputfile[--inputfiles];
138 if (fexists(outputfile) && !overwrite) {
139 fprintf(stderr, "Error: Output file '%s' already exists. Use -overwrite to replace it.\n", outputfile);
140 return EXIT_FAILURE;
141 }
142 }
143 } else if (inputfiles == 1) {
144 if ((pipeFlags & USE_STDOUT) && outputfile) {
145 fprintf(stderr, "Error: Too many filenames provided with -pipe=output.\n");
146 return EXIT_FAILURE;
147 }
148 } else {
149 fprintf(stderr, "Error: No input filenames provided.\n%s", USAGE);
150 return EXIT_FAILURE;
151 }
152
153 for (i = 0; i < inputfiles; i++) {
154 if (!SDDS_InitializeInput(&SDDS_input, inputfile[i])) {
155 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
156 return EXIT_FAILURE;
157 }
158
159 columnname = SDDS_GetColumnNames(&SDDS_input, &columnnames);
160 if (columnname == NULL) {
161 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
162 return EXIT_FAILURE;
163 }
164
165 if (columnnames > 3 || columnnames < 2) {
166 fprintf(stderr, "Error: Unexpected number of columns in '%s'.\n", inputfile[i]);
167 return EXIT_FAILURE;
168 }
169
170 if (columnnames == 2) {
171 if (strcmp("Time", columnname[0]) == 0) {
172 dataIndex = 1;
173 } else if (strcmp("Time", columnname[1]) == 0) {
174 dataIndex = 0;
175 } else {
176 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
177 return EXIT_FAILURE;
178 }
179 }
180
181 if (columnnames == 3) {
182 if (strcmp("CAerrors", columnname[0]) == 0) {
183 if (strcmp("Time", columnname[1]) == 0) {
184 dataIndex = 2;
185 } else if (strcmp("Time", columnname[2]) == 0) {
186 dataIndex = 1;
187 } else {
188 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
189 return EXIT_FAILURE;
190 }
191 } else if (strcmp("CAerrors", columnname[1]) == 0) {
192 if (strcmp("Time", columnname[0]) == 0) {
193 dataIndex = 2;
194 } else if (strcmp("Time", columnname[2]) == 0) {
195 dataIndex = 0;
196 } else {
197 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
198 return EXIT_FAILURE;
199 }
200 } else if (strcmp("CAerrors", columnname[2]) == 0) {
201 if (strcmp("Time", columnname[0]) == 0) {
202 dataIndex = 1;
203 } else if (strcmp("Time", columnname[1]) == 0) {
204 dataIndex = 0;
205 } else {
206 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
207 return EXIT_FAILURE;
208 }
209 } else {
210 fprintf(stderr, "Error: 'CAerrors' column is missing in '%s'.\n", inputfile[i]);
211 return EXIT_FAILURE;
212 }
213 }
214
215 while (SDDS_ReadTable(&SDDS_input) > 0) {
216 timeValues = realloc(timeValues, sizeof(*timeValues) * (page + 1));
217 dataValues = realloc(dataValues, sizeof(*dataValues) * (page + 1));
218 dataNames = realloc(dataNames, sizeof(*dataNames) * (page + 1));
219 rows = realloc(rows, sizeof(*rows) * (page + 1));
220
221 SDDS_CopyString(&dataNames[page], columnname[dataIndex]);
222 rows[page] = SDDS_RowCount(&SDDS_input);
223
224 if (rows[page] > 0) {
225 timeValues[page] = SDDS_GetColumnInDoubles(&SDDS_input, "Time");
226 if (timeValues[page] == NULL) {
227 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
228 return EXIT_FAILURE;
229 }
230
231 dataValues[page] = SDDS_GetColumnInDoubles(&SDDS_input, columnname[dataIndex]);
232 if (dataValues[page] == NULL) {
233 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
234 return EXIT_FAILURE;
235 }
236 } else {
237 timeValues[page] = NULL;
238 dataValues[page] = NULL;
239 }
240 page++;
241 }
242
243 for (j = 0; j < columnnames; j++) {
244 free(columnname[j]);
245 }
246 free(columnname);
247
248 if (!SDDS_Terminate(&SDDS_input)) {
249 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
250 return EXIT_FAILURE;
251 }
252 }
253
254 pages = page;
255
256 /* Identify unique data names */
257 for (page = 0; page < pages; page++) {
258 found = 0;
259 for (i = 0; i < uniqueDataNames; i++) {
260 if (strcmp(dataNames[page], uniqueDataName[i]) == 0) {
261 found = 1;
262 break;
263 }
264 }
265 if (!found) {
266 uniqueDataName = realloc(uniqueDataName, sizeof(*uniqueDataName) * (uniqueDataNames + 1));
267 SDDS_CopyString(&uniqueDataName[uniqueDataNames], dataNames[page]);
268 uniqueDataNames++;
269 }
270 }
271
272 /* Initialize output SDDS file */
273 if (!SDDS_InitializeOutput(&SDDS_output, SDDS_BINARY, 0, NULL, NULL, outputfile)) {
274 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
275 return EXIT_FAILURE;
276 }
277
278 if (!SDDS_DefineSimpleColumn(&SDDS_output, "Time", "s", SDDS_DOUBLE)) {
279 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
280 return EXIT_FAILURE;
281 }
282
283 for (i = 0; i < uniqueDataNames; i++) {
284 if (!SDDS_DefineSimpleColumn(&SDDS_output, uniqueDataName[i], NULL, SDDS_DOUBLE)) {
285 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
286 return EXIT_FAILURE;
287 }
288 }
289
290 outputDataValues = malloc(sizeof(*outputDataValues) * uniqueDataNames);
291 if (uniqueDataNames == 1) {
292 /* Single PV: Concatenate all data */
293 for (page = 0; page < pages; page++) {
294 allocated_rows += rows[page];
295 }
296
297 outputTimeValues = malloc(sizeof(*outputTimeValues) * allocated_rows);
298 outputDataValues[0] = malloc(sizeof(*(outputDataValues[0])) * allocated_rows);
299
300 i = 0;
301 for (page = 0; page < pages; page++) {
302 for (j = 0; j < rows[page]; j++) {
303 outputTimeValues[i] = timeValues[page][j];
304 outputDataValues[0][i] = dataValues[page][j];
305 i++;
306 }
307 }
308 } else {
309 /* Multiple PVs: Retain only common timestamps */
310 flag = malloc(sizeof(*flag) * pages);
311 for (page = 0; page < pages; page++) {
312 flag[page] = calloc(rows[page], sizeof(*(flag[page])));
313 }
314
315 array = malloc(sizeof(*array) * uniqueDataNames);
316 arrayCount = calloc(uniqueDataNames, sizeof(*arrayCount));
317
318 for (i = 0; i < uniqueDataNames; i++) {
319 for (page = 0; page < pages; page++) {
320 if (strcmp(dataNames[page], uniqueDataName[i]) == 0) {
321 arrayCount[i]++;
322 if (arrayCount[i] == 1) {
323 array[i] = malloc(sizeof(*(array[i])));
324 } else {
325 array[i] = realloc(array[i], sizeof(*(array[i])) * arrayCount[i]);
326 }
327 array[i][arrayCount[i] - 1] = page;
328 }
329 }
330 }
331
332 for (i = 0; i < arrayCount[0]; i++) {
333 keyGroup = MakeSortedKeyGroups(&keyGroups, SDDS_DOUBLE, timeValues[array[0][i]], rows[array[0][i]]);
334 for (n = 1; n < uniqueDataNames; n++) {
335 for (m = 0; m < arrayCount[n]; m++) {
336 if ((i == m) && (rows[array[0][i]] == rows[array[n][m]]) && (rows[array[0][i]] > 10)) {
337 if ((timeValues[array[0][i]][0] == timeValues[array[n][m]][0]) &&
338 (timeValues[array[0][i]][1] == timeValues[array[n][m]][1]) &&
339 (timeValues[array[0][i]][rows[array[0][i]] - 2] == timeValues[array[n][m]][rows[array[n][m]] - 2]) &&
340 (timeValues[array[0][i]][rows[array[0][i]] - 1] == timeValues[array[n][m]][rows[array[n][m]] - 1])) {
341 /* Assume the entire page matches because it has the same number of rows and key timestamps match */
342 for (r = 0; r < rows[array[n][m]]; r++) {
343 if (flag[array[n][m]][r]) {
344 continue;
345 }
346 flag[array[0][i]][r] += 1;
347 flag[array[n][m]][r] = 1;
348 }
349 }
350 }
351
352 for (r = 0; r < rows[array[n][m]]; r++) {
353 if (flag[array[n][m]][r]) {
354 continue;
355 }
356 row = FindMatchingKeyGroup(keyGroup, keyGroups, SDDS_DOUBLE, &(timeValues[array[n][m]][r]), 1);
357 if (row >= 0) {
358 flag[array[0][i]][row] += 1;
359 flag[array[n][m]][r] = 1;
360 }
361 }
362 }
363 }
364
365 for (j = 0; j < keyGroups; j++) {
366 free(keyGroup[j]->equivalent);
367 free(keyGroup[j]);
368 }
369 free(keyGroup);
370 }
371
372 z = uniqueDataNames - 1;
373 for (n = 0; n < arrayCount[0]; n++) {
374 for (m = 0; m < rows[array[0][n]]; m++) {
375 if (flag[array[0][n]][m] >= z) {
376 allocated_rows++;
377 }
378 }
379 }
380
381 outputTimeValues = malloc(sizeof(*outputTimeValues) * allocated_rows);
382 for (i = 0; i < uniqueDataNames; i++) {
383 outputDataValues[i] = malloc(sizeof(*(outputDataValues[i])) * allocated_rows);
384 }
385
386 s = 0;
387 for (i = 0; i < arrayCount[0]; i++) {
388 for (j = 0; j < rows[array[0][i]]; j++) {
389 if (flag[array[0][i]][j] >= z) {
390 outputTimeValues[s] = timeValues[array[0][i]][j];
391 outputDataValues[0][s] = dataValues[array[0][i]][j];
392 s++;
393 }
394 }
395 }
396
397 if (s == 0) {
398 fprintf(stderr, "Error: No matching 'Time' rows found in input files.\n");
399 return EXIT_FAILURE;
400 }
401
402 keyGroup = MakeSortedKeyGroups(&keyGroups, SDDS_DOUBLE, outputTimeValues, s);
403
404 for (n = 1; n < uniqueDataNames; n++) {
405 for (m = 0; m < arrayCount[n]; m++) {
406 for (r = 0; r < rows[array[n][m]]; r++) {
407 if (flag[array[n][m]][r]) {
408 row = FindMatchingKeyGroup(keyGroup, keyGroups, SDDS_DOUBLE, &(timeValues[array[n][m]][r]), 1);
409 if (row >= 0) {
410 outputDataValues[n][row] = dataValues[array[n][m]][r];
411 }
412 }
413 }
414 }
415 }
416
417 for (i = 0; i < uniqueDataNames; i++) {
418 free(array[i]);
419 }
420
421 for (j = 0; j < keyGroups; j++) {
422 if (keyGroup[j]->equivalent)
423 free(keyGroup[j]->equivalent);
424 if (keyGroup[j])
425 free(keyGroup[j]);
426 }
427
428 for (page = 0; page < pages; page++) {
429 free(flag[page]);
430 }
431
432 free(array);
433 free(keyGroup);
434 free(arrayCount);
435 free(flag);
436 }
437
438 /* Free allocated memory for input data */
439 for (page = 0; page < pages; page++) {
440 if (timeValues[page])
441 free(timeValues[page]);
442 if (dataValues[page])
443 free(dataValues[page]);
444 free(dataNames[page]);
445 }
446 free(timeValues);
447 free(dataValues);
448 free(dataNames);
449
450 /* Write the output SDDS file */
451 if (!SDDS_WriteLayout(&SDDS_output)) {
452 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
453 return EXIT_FAILURE;
454 }
455
456 if (!SDDS_StartPage(&SDDS_output, allocated_rows)) {
457 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
458 return EXIT_FAILURE;
459 }
460
461 if (!SDDS_SetColumnFromDoubles(&SDDS_output, SDDS_SET_BY_NAME, outputTimeValues, allocated_rows, "Time")) {
462 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
463 return EXIT_FAILURE;
464 }
465
466 for (i = 0; i < uniqueDataNames; i++) {
467 if (!SDDS_SetColumnFromDoubles(&SDDS_output, SDDS_SET_BY_NAME, outputDataValues[i], allocated_rows, uniqueDataName[i])) {
468 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
469 return EXIT_FAILURE;
470 }
471 }
472
473 if (!SDDS_WriteTable(&SDDS_output)) {
474 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
475 return EXIT_FAILURE;
476 }
477
478 if (!SDDS_Terminate(&SDDS_output)) {
479 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
480 return EXIT_FAILURE;
481 }
482
483 /* Free allocated memory for output data */
484 for (i = 0; i < uniqueDataNames; i++) {
485 free(uniqueDataName[i]);
486 free(outputDataValues[i]);
487 }
488 free(outputTimeValues);
489 free(outputDataValues);
490 free(uniqueDataName);
491 free(rows);
492
493 if (inputfiles > 0) {
494 free(inputfile);
495 }
496
497 free_scanargs(&s_arg, argc);
498
499 return EXIT_SUCCESS;
500}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
long fexists(const char *filename)
Checks if a file exists.
Definition fexists.c:27
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
KEYED_EQUIVALENT ** MakeSortedKeyGroups(long *keyGroups, long keyType, void *data, long points)
Create sorted key groups from data.
long FindMatchingKeyGroup(KEYED_EQUIVALENT **keyGroup, long keyGroups, long keyType, void *searchKeyData, long reuse)
Find a matching key group for a search key.