SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddscombinelogfiles.c
Go to the documentation of this file.
1/**
2 * @file sddscombinelogfiles.c
3 * @brief Combines multiple SDDS log files into a single file, retaining only common timestamps.
4 *
5 * @details
6 * This program processes SDDS log files in the one-PV-per-file format, merging them into a single SDDS file.
7 * It retains only the timestamps common across all input files and supports flexible input-output configurations
8 * such as pipe-based I/O and overwriting existing output files.
9 *
10 * @section Usage
11 * ```
12 * sddscombinelogfiles [<SDDSinputfilelist>] [<SDDSoutputfile>]
13 * [-pipe=[output]]
14 * [-overwrite]
15 * ```
16 *
17 * @section Options
18 * | Option | Description |
19 * |--------------------|-----------------------------------------------------------------|
20 * | `-pipe` | Use pipe output for the SDDS file instead of writing to a file. |
21 * | `-overwrite` | Overwrite the output file if it already exists. |
22 *
23 * @copyright
24 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
25 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
26 *
27 * @license
28 * This file is distributed under the terms of the Software License Agreement
29 * found in the file LICENSE included with this distribution.
30 *
31 * @author
32 * R. Soliday, L. Emery
33 */
34
35#include "mdb.h"
36#include "scan.h"
37#include "SDDS.h"
38
39/* Enumeration for option types */
40enum option_type {
41 SET_PIPE,
42 SET_OVERWRITE,
43 N_OPTIONS
44};
45
46static char *option[N_OPTIONS] = {
47 "pipe",
48 "overwrite"
49};
50
51const char *USAGE =
52 "Usage: sddscombinelogfiles [<SDDSinputfilelist>] [<SDDSoutputfile>]\n"
53 " [-pipe=[output]] [-overwrite]\n\n"
54 "This program combines data logger output files that are in the one-PV-per-file format.\n"
55 "Only the timestamps present in all input files are retained in the output file.\n\n"
56 "Options:\n"
57 " -pipe=[output] Specify the pipe output.\n"
58 " -overwrite Overwrite the output file if it already exists.\n\n"
59 "Example:\n"
60 " sddscombinelogfiles input1.sdds input2.sdds output.sdds -overwrite\n\n"
61 "Link date: " __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION "\n";
62
63int main(int argc, char **argv) {
64 SDDS_DATASET SDDS_input;
65 SDDS_DATASET SDDS_output;
66 SCANNED_ARG *s_arg;
67 KEYED_EQUIVALENT **keyGroup = NULL;
68 long keyGroups = 0;
69 char **inputfile = NULL;
70 int inputfiles = 0;
71 char *outputfile = NULL;
72 int i_arg, n, row, z;
73 int64_t i, j, m, r, s;
74 unsigned long pipeFlags = 0;
75 int overwrite = 0;
76 char **columnname;
77 int32_t columnnames;
78 int dataIndex;
79 double **timeValues = NULL;
80 double **dataValues = NULL;
81 short **flag = NULL;
82 int64_t *rows = NULL;
83 char **dataNames = NULL;
84 char **uniqueDataName = NULL;
85 int uniqueDataNames = 0;
86 int page = 0;
87 int pages;
88 int found;
89 double *outputTimeValues = NULL;
90 double **outputDataValues = NULL;
91 int64_t allocated_rows = 0;
92 int **array = NULL;
93 int *arrayCount;
94
96 argc = scanargs(&s_arg, argc, argv);
97
98 if (argc < 3) {
99 fprintf(stderr, "%s", USAGE);
100 return EXIT_FAILURE;
101 }
102
103 for (i_arg = 1; i_arg < argc; i_arg++) {
104 if (s_arg[i_arg].arg_type == OPTION) {
105 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
106 case SET_OVERWRITE:
107 overwrite = 1;
108 break;
109 case SET_PIPE:
110 if (!processPipeOption(s_arg[i_arg].list + 1,
111 s_arg[i_arg].n_items - 1,
112 &pipeFlags)) {
113 fprintf(stderr, "Error: Invalid -pipe option syntax.\n");
114 return EXIT_FAILURE;
115 }
116 if (pipeFlags & USE_STDIN) {
117 fprintf(stderr, "Error: -pipe=in is not supported.\n");
118 return EXIT_FAILURE;
119 }
120 break;
121 default:
122 fprintf(stderr, "Error: Unrecognized option.\n%s", USAGE);
123 return EXIT_FAILURE;
124 }
125 } else {
126 inputfile = trealloc(inputfile, sizeof(*inputfile) * (inputfiles + 1));
127 inputfile[inputfiles++] = s_arg[i_arg].list[0];
128 }
129 }
130
131 if (inputfiles > 1) {
132 if (!(pipeFlags & USE_STDOUT)) {
133 outputfile = inputfile[--inputfiles];
134 if (fexists(outputfile) && !overwrite) {
135 fprintf(stderr, "Error: Output file '%s' already exists. Use -overwrite to replace it.\n", outputfile);
136 return EXIT_FAILURE;
137 }
138 }
139 } else if (inputfiles == 1) {
140 if ((pipeFlags & USE_STDOUT) && outputfile) {
141 fprintf(stderr, "Error: Too many filenames provided with -pipe=output.\n");
142 return EXIT_FAILURE;
143 }
144 } else {
145 fprintf(stderr, "Error: No input filenames provided.\n%s", USAGE);
146 return EXIT_FAILURE;
147 }
148
149 for (i = 0; i < inputfiles; i++) {
150 if (!SDDS_InitializeInput(&SDDS_input, inputfile[i])) {
151 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
152 return EXIT_FAILURE;
153 }
154
155 columnname = SDDS_GetColumnNames(&SDDS_input, &columnnames);
156 if (columnname == NULL) {
157 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
158 return EXIT_FAILURE;
159 }
160
161 if (columnnames > 3 || columnnames < 2) {
162 fprintf(stderr, "Error: Unexpected number of columns in '%s'.\n", inputfile[i]);
163 return EXIT_FAILURE;
164 }
165
166 if (columnnames == 2) {
167 if (strcmp("Time", columnname[0]) == 0) {
168 dataIndex = 1;
169 } else if (strcmp("Time", columnname[1]) == 0) {
170 dataIndex = 0;
171 } else {
172 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
173 return EXIT_FAILURE;
174 }
175 }
176
177 if (columnnames == 3) {
178 if (strcmp("CAerrors", columnname[0]) == 0) {
179 if (strcmp("Time", columnname[1]) == 0) {
180 dataIndex = 2;
181 } else if (strcmp("Time", columnname[2]) == 0) {
182 dataIndex = 1;
183 } else {
184 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
185 return EXIT_FAILURE;
186 }
187 } else if (strcmp("CAerrors", columnname[1]) == 0) {
188 if (strcmp("Time", columnname[0]) == 0) {
189 dataIndex = 2;
190 } else if (strcmp("Time", columnname[2]) == 0) {
191 dataIndex = 0;
192 } else {
193 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
194 return EXIT_FAILURE;
195 }
196 } else if (strcmp("CAerrors", columnname[2]) == 0) {
197 if (strcmp("Time", columnname[0]) == 0) {
198 dataIndex = 1;
199 } else if (strcmp("Time", columnname[1]) == 0) {
200 dataIndex = 0;
201 } else {
202 fprintf(stderr, "Error: 'Time' column is missing in '%s'.\n", inputfile[i]);
203 return EXIT_FAILURE;
204 }
205 } else {
206 fprintf(stderr, "Error: 'CAerrors' column is missing in '%s'.\n", inputfile[i]);
207 return EXIT_FAILURE;
208 }
209 }
210
211 while (SDDS_ReadTable(&SDDS_input) > 0) {
212 timeValues = realloc(timeValues, sizeof(*timeValues) * (page + 1));
213 dataValues = realloc(dataValues, sizeof(*dataValues) * (page + 1));
214 dataNames = realloc(dataNames, sizeof(*dataNames) * (page + 1));
215 rows = realloc(rows, sizeof(*rows) * (page + 1));
216
217 SDDS_CopyString(&dataNames[page], columnname[dataIndex]);
218 rows[page] = SDDS_RowCount(&SDDS_input);
219
220 if (rows[page] > 0) {
221 timeValues[page] = SDDS_GetColumnInDoubles(&SDDS_input, "Time");
222 if (timeValues[page] == NULL) {
223 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
224 return EXIT_FAILURE;
225 }
226
227 dataValues[page] = SDDS_GetColumnInDoubles(&SDDS_input, columnname[dataIndex]);
228 if (dataValues[page] == NULL) {
229 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
230 return EXIT_FAILURE;
231 }
232 } else {
233 timeValues[page] = NULL;
234 dataValues[page] = NULL;
235 }
236 page++;
237 }
238
239 for (j = 0; j < columnnames; j++) {
240 free(columnname[j]);
241 }
242 free(columnname);
243
244 if (!SDDS_Terminate(&SDDS_input)) {
245 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
246 return EXIT_FAILURE;
247 }
248 }
249
250 pages = page;
251
252 /* Identify unique data names */
253 for (page = 0; page < pages; page++) {
254 found = 0;
255 for (i = 0; i < uniqueDataNames; i++) {
256 if (strcmp(dataNames[page], uniqueDataName[i]) == 0) {
257 found = 1;
258 break;
259 }
260 }
261 if (!found) {
262 uniqueDataName = realloc(uniqueDataName, sizeof(*uniqueDataName) * (uniqueDataNames + 1));
263 SDDS_CopyString(&uniqueDataName[uniqueDataNames], dataNames[page]);
264 uniqueDataNames++;
265 }
266 }
267
268 /* Initialize output SDDS file */
269 if (!SDDS_InitializeOutput(&SDDS_output, SDDS_BINARY, 0, NULL, NULL, outputfile)) {
270 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
271 return EXIT_FAILURE;
272 }
273
274 if (!SDDS_DefineSimpleColumn(&SDDS_output, "Time", "s", SDDS_DOUBLE)) {
275 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
276 return EXIT_FAILURE;
277 }
278
279 for (i = 0; i < uniqueDataNames; i++) {
280 if (!SDDS_DefineSimpleColumn(&SDDS_output, uniqueDataName[i], NULL, SDDS_DOUBLE)) {
281 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
282 return EXIT_FAILURE;
283 }
284 }
285
286 outputDataValues = malloc(sizeof(*outputDataValues) * uniqueDataNames);
287 if (uniqueDataNames == 1) {
288 /* Single PV: Concatenate all data */
289 for (page = 0; page < pages; page++) {
290 allocated_rows += rows[page];
291 }
292
293 outputTimeValues = malloc(sizeof(*outputTimeValues) * allocated_rows);
294 outputDataValues[0] = malloc(sizeof(*(outputDataValues[0])) * allocated_rows);
295
296 i = 0;
297 for (page = 0; page < pages; page++) {
298 for (j = 0; j < rows[page]; j++) {
299 outputTimeValues[i] = timeValues[page][j];
300 outputDataValues[0][i] = dataValues[page][j];
301 i++;
302 }
303 }
304 } else {
305 /* Multiple PVs: Retain only common timestamps */
306 flag = malloc(sizeof(*flag) * pages);
307 for (page = 0; page < pages; page++) {
308 flag[page] = calloc(rows[page], sizeof(*(flag[page])));
309 }
310
311 array = malloc(sizeof(*array) * uniqueDataNames);
312 arrayCount = calloc(uniqueDataNames, sizeof(*arrayCount));
313
314 for (i = 0; i < uniqueDataNames; i++) {
315 for (page = 0; page < pages; page++) {
316 if (strcmp(dataNames[page], uniqueDataName[i]) == 0) {
317 arrayCount[i]++;
318 if (arrayCount[i] == 1) {
319 array[i] = malloc(sizeof(*(array[i])));
320 } else {
321 array[i] = realloc(array[i], sizeof(*(array[i])) * arrayCount[i]);
322 }
323 array[i][arrayCount[i] - 1] = page;
324 }
325 }
326 }
327
328 for (i = 0; i < arrayCount[0]; i++) {
329 keyGroup = MakeSortedKeyGroups(&keyGroups, SDDS_DOUBLE, timeValues[array[0][i]], rows[array[0][i]]);
330 for (n = 1; n < uniqueDataNames; n++) {
331 for (m = 0; m < arrayCount[n]; m++) {
332 if ((i == m) && (rows[array[0][i]] == rows[array[n][m]]) && (rows[array[0][i]] > 10)) {
333 if ((timeValues[array[0][i]][0] == timeValues[array[n][m]][0]) &&
334 (timeValues[array[0][i]][1] == timeValues[array[n][m]][1]) &&
335 (timeValues[array[0][i]][rows[array[0][i]] - 2] == timeValues[array[n][m]][rows[array[n][m]] - 2]) &&
336 (timeValues[array[0][i]][rows[array[0][i]] - 1] == timeValues[array[n][m]][rows[array[n][m]] - 1])) {
337 /* Assume the entire page matches because it has the same number of rows and key timestamps match */
338 for (r = 0; r < rows[array[n][m]]; r++) {
339 if (flag[array[n][m]][r]) {
340 continue;
341 }
342 flag[array[0][i]][r] += 1;
343 flag[array[n][m]][r] = 1;
344 }
345 }
346 }
347
348 for (r = 0; r < rows[array[n][m]]; r++) {
349 if (flag[array[n][m]][r]) {
350 continue;
351 }
352 row = FindMatchingKeyGroup(keyGroup, keyGroups, SDDS_DOUBLE, &(timeValues[array[n][m]][r]), 1);
353 if (row >= 0) {
354 flag[array[0][i]][row] += 1;
355 flag[array[n][m]][r] = 1;
356 }
357 }
358 }
359 }
360
361 for (j = 0; j < keyGroups; j++) {
362 free(keyGroup[j]->equivalent);
363 free(keyGroup[j]);
364 }
365 free(keyGroup);
366 }
367
368 z = uniqueDataNames - 1;
369 for (n = 0; n < arrayCount[0]; n++) {
370 for (m = 0; m < rows[array[0][n]]; m++) {
371 if (flag[array[0][n]][m] >= z) {
372 allocated_rows++;
373 }
374 }
375 }
376
377 outputTimeValues = malloc(sizeof(*outputTimeValues) * allocated_rows);
378 for (i = 0; i < uniqueDataNames; i++) {
379 outputDataValues[i] = malloc(sizeof(*(outputDataValues[i])) * allocated_rows);
380 }
381
382 s = 0;
383 for (i = 0; i < arrayCount[0]; i++) {
384 for (j = 0; j < rows[array[0][i]]; j++) {
385 if (flag[array[0][i]][j] >= z) {
386 outputTimeValues[s] = timeValues[array[0][i]][j];
387 outputDataValues[0][s] = dataValues[array[0][i]][j];
388 s++;
389 }
390 }
391 }
392
393 if (s == 0) {
394 fprintf(stderr, "Error: No matching 'Time' rows found in input files.\n");
395 return EXIT_FAILURE;
396 }
397
398 keyGroup = MakeSortedKeyGroups(&keyGroups, SDDS_DOUBLE, outputTimeValues, s);
399
400 for (n = 1; n < uniqueDataNames; n++) {
401 for (m = 0; m < arrayCount[n]; m++) {
402 for (r = 0; r < rows[array[n][m]]; r++) {
403 if (flag[array[n][m]][r]) {
404 row = FindMatchingKeyGroup(keyGroup, keyGroups, SDDS_DOUBLE, &(timeValues[array[n][m]][r]), 1);
405 if (row >= 0) {
406 outputDataValues[n][row] = dataValues[array[n][m]][r];
407 }
408 }
409 }
410 }
411 }
412
413 for (i = 0; i < uniqueDataNames; i++) {
414 free(array[i]);
415 }
416
417 for (j = 0; j < keyGroups; j++) {
418 if (keyGroup[j]->equivalent)
419 free(keyGroup[j]->equivalent);
420 if (keyGroup[j])
421 free(keyGroup[j]);
422 }
423
424 for (page = 0; page < pages; page++) {
425 free(flag[page]);
426 }
427
428 free(array);
429 free(keyGroup);
430 free(arrayCount);
431 free(flag);
432 }
433
434 /* Free allocated memory for input data */
435 for (page = 0; page < pages; page++) {
436 if (timeValues[page])
437 free(timeValues[page]);
438 if (dataValues[page])
439 free(dataValues[page]);
440 free(dataNames[page]);
441 }
442 free(timeValues);
443 free(dataValues);
444 free(dataNames);
445
446 /* Write the output SDDS file */
447 if (!SDDS_WriteLayout(&SDDS_output)) {
448 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
449 return EXIT_FAILURE;
450 }
451
452 if (!SDDS_StartPage(&SDDS_output, allocated_rows)) {
453 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
454 return EXIT_FAILURE;
455 }
456
457 if (!SDDS_SetColumnFromDoubles(&SDDS_output, SDDS_SET_BY_NAME, outputTimeValues, allocated_rows, "Time")) {
458 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
459 return EXIT_FAILURE;
460 }
461
462 for (i = 0; i < uniqueDataNames; i++) {
463 if (!SDDS_SetColumnFromDoubles(&SDDS_output, SDDS_SET_BY_NAME, outputDataValues[i], allocated_rows, uniqueDataName[i])) {
464 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
465 return EXIT_FAILURE;
466 }
467 }
468
469 if (!SDDS_WriteTable(&SDDS_output)) {
470 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
471 return EXIT_FAILURE;
472 }
473
474 if (!SDDS_Terminate(&SDDS_output)) {
475 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
476 return EXIT_FAILURE;
477 }
478
479 /* Free allocated memory for output data */
480 for (i = 0; i < uniqueDataNames; i++) {
481 free(uniqueDataName[i]);
482 free(outputDataValues[i]);
483 }
484 free(outputTimeValues);
485 free(outputDataValues);
486 free(uniqueDataName);
487 free(rows);
488
489 if (inputfiles > 0) {
490 free(inputfile);
491 }
492
493 free_scanargs(&s_arg, argc);
494
495 return EXIT_SUCCESS;
496}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
long fexists(const char *filename)
Checks if a file exists.
Definition fexists.c:27
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
KEYED_EQUIVALENT ** MakeSortedKeyGroups(long *keyGroups, long keyType, void *data, long points)
Create sorted key groups from data.
long FindMatchingKeyGroup(KEYED_EQUIVALENT **keyGroup, long keyGroups, long keyType, void *searchKeyData, long reuse)
Find a matching key group for a search key.