SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsbreak.c
Go to the documentation of this file.
1/**
2 * @file sddsbreak.c
3 * @brief Splits pages of an SDDS file into subpages based on user-defined criteria.
4 *
5 * @details
6 * This file provides the implementation of the `sddsbreak` program, which processes
7 * SDDS (Self Describing Data Set) files by dividing their pages into subpages
8 * according to various user-specified conditions. Supported options include breaking
9 * based on gaps, increases, or decreases in column values, matching patterns, and
10 * imposing row limits. The program supports both row-major and column-major order.
11 *
12 * @section Usage
13 * ```
14 * sddsbreak [<inputfile>] [<outputfile>]
15 * [-pipe=[input][,output]]
16 * [-gapin=<column-name>[,{amount=<value>|factor=<value>}]]
17 * [-increaseof=<column-name>[,{amount=<value>}[,cumulative[,reset]]]]
18 * [-decreaseof=<column-name>[,{amount=<value>}[,cumulative[,reset]]]]
19 * [-changeof=<column-name>[,amount=<value>,base=<value>]]
20 * [-matchto=<column-name>,<pattern>[,after]]
21 * [-rowlimit=<integer>[,overlap=<integer>]]
22 * [-pagesPerPage=<integer>]
23 * [-majorOrder=row|column]
24 * ```
25 *
26 * @section Options
27 * | Option | Description |
28 * |----------------------------|-------------------------------------------------------------------------------------------------|
29 * | `-pipe` | Use pipes for input and/or output. |
30 * | `-gapin` | Break pages based on gaps in the specified column. |
31 * | `-increaseof` | Break pages when the specified column increases by a certain amount, with optional cumulative reset. |
32 * | `-decreaseof` | Break pages when the specified column decreases by a certain amount, with optional cumulative reset. |
33 * | `-changeof` | Break pages based on changes in the specified column relative to a base value. |
34 * | `-matchto` | Break pages when a pattern is matched in the specified column. |
35 * | `-rowlimit` | Limit the number of rows per subpage, optionally overlapping rows. |
36 * | `-pagesPerPage` | Break each page into the given number of roughly equal-length pages. |
37 * | `-majorOrder` | Specify the major order of data as row-major or column-major. |
38 *
39 * @subsection Incompatibilities
40 * - `-gapin`, `-increaseof`, `-decreaseof`, `-changeof`, and `-matchto` are mutually exclusive; only one may be used.
41 * - `-rowlimit` cannot be combined with other break conditions.
42 * - `-pagesPerPage` cannot be combined with other break conditions.
43 * - For `-gapin`, either `amount` or `factor` must be specified, but not both.
44 * - For `-changeof`, at least one of `amount` or `base` must be specified.
45 *
46 * @subsection Requirements
47 * - For `-increaseof` and `-decreaseof`, the `amount` must be positive.
48 *
49 * @copyright
50 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
51 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
52 *
53 * @license
54 * This file is distributed under the terms of the Software License Agreement
55 * found in the file LICENSE included with this distribution.
56 *
57 * @authors
58 * M. Borland,
59 * C. Saunders,
60 * R. Soliday,
61 * H. Shang
62 */
63
64#include "mdb.h"
65#include "SDDS.h"
66#include "scan.h"
67
68/* Enumeration for option types */
69enum option_type {
70 SET_GAPIN,
71 SET_INCREASEOF,
72 SET_DECREASEOF,
73 SET_CHANGEOF,
74 SET_ROWLIMIT,
75 SET_PIPE,
76 SET_MATCHTO,
77 SET_MAJOR_ORDER,
78 SET_PAGES_PER_PAGE,
79 N_OPTIONS
80};
81
82char *option[N_OPTIONS] = {
83 "gapin",
84 "increaseof",
85 "decreaseof",
86 "changeof",
87 "rowlimit",
88 "pipe",
89 "matchto",
90 "majorOrder",
91 "pagesperpage"
92};
93
94char *USAGE =
95 "Usage: sddsbreak [<inputfile>] [<outputfile>]\n"
96 " [-pipe=[input][,output]]\n"
97 " [-gapin=<column-name>[,{amount=<value>|factor=<value>}]]\n"
98 " [-increaseof=<column-name>[,{amount=<value>}[,cumulative[,reset]]]]\n"
99 " [-decreaseof=<column-name>[,{amount=<value>}[,cumulative[,reset]]]]\n"
100 " [-changeof=<column-name>[,amount=<value>,base=<value>]] \n"
101 " [-matchto=<column-name>,<pattern>[,after]] \n"
102 " [-rowlimit=<integer>[,overlap=<integer>]]\n"
103 " [-pagesPerPage=<integer>]\n"
104 " [-majorOrder=row|column]\n"
105 "Options:\n"
106 " -pipe=[input][,output]\n"
107 " Use pipes for input and/or output.\n"
108 " -gapin=<column-name>[,{amount=<value> | factor=<value>}]\n"
109 " Break pages based on gaps in the specified column.\n"
110 " -increaseof=<column-name>[,amount=<value>[,cumulative][,reset]]\n"
111 " Break pages when the specified column increases by a certain amount.\n"
112 " -decreaseof=<column-name>[,amount=<value>[,cumulative][,reset]]\n"
113 " Break pages when the specified column decreases by a certain amount.\n"
114 " -changeof=<column-name>[,amount=<value>,base=<value>]\n"
115 " Break pages based on changes in the specified column relative to a base value.\n"
116 " -matchto=<column-name>,<pattern>[,after]\n"
117 " Break pages when a pattern is matched in the specified column.\n"
118 " -rowlimit=<integer>[,overlap=<integer>]\n"
119 " Limit the number of rows per subpage with an optional overlap.\n"
120 " -pagesPerPage=<integer>\n"
121 " Break each page into the given number of roughly equal-length pages.\n"
122 " -majorOrder=row|column\n"
123 " Specify the major order of data as row-major or column-major.\n"
124 "\n"
125 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
126
127#define GAPIN_AMOUNT 0x0001U
128#define GAPIN_FACTOR 0x0002U
129char *gapinUsage = "-gapin=<column-name>[,{amount=<value> | factor=<value>}]";
130
131#define CHANGEOF_AMOUNT 0x0001U
132#define CHANGEOF_BASE 0x0002U
133char *changeofUsage = "-changeof=<column-name>[,amount=<value>,base=<value>]";
134
135#define INCREASEOF_AMOUNT 0x0001UL
136#define INCREASEOF_CUMULATIVE 0x0002UL
137#define INCREASEOF_RESET 0x0004UL
138char *increaseOfUsage = "-increaseOf=<column-name>[,amount=<value>[,cumulative][,reset]]";
139
140#define DECREASEOF_AMOUNT 0x0001UL
141#define DECREASEOF_CUMULATIVE 0x0002UL
142#define DECREASEOF_RESET 0x0004UL
143char *decreaseOfUsage = "-decreaseOf=<column-name>[,amount=<value>[,cumulative[,reset]]]";
144
145#define ROWLIMIT_OVERLAP 0x0001U
146
147int main(int argc, char **argv) {
148 SDDS_DATASET SDDSnew, SDDSold;
149 long iArg;
150 SCANNED_ARG *scArg;
151 char *input = NULL, *output = NULL, *columnName = NULL;
152 long mode = -1, matchCode, tmpfile_used;
153 int64_t i, j, rows, rowLimit, pagesPerPage=0;
154 long breakNext;
155 double gapAmount = 0, gapFactor = 0, changeAmount, changeBase, *columnData = NULL;
156 char *matchPattern = NULL;
157 long matchPatternAfter = 0;
158 double increaseOfAmount = -1, decreaseOfAmount = -1;
159 long retval;
160 int32_t dataType;
161 long overlap = 0;
162 unsigned long flags = 0, pipeFlags = 0, changeFlags = 0, decreaseOfFlags = 0, increaseOfFlags = 0;
163 unsigned long majorOrderFlag;
164 char **stringData = NULL;
165 short columnMajorOrder = -1;
166
168 argc = scanargs(&scArg, argc, argv);
169 if (argc < 2) {
170 fprintf(stderr, "%s", USAGE);
171 return EXIT_FAILURE;
172 }
173
174 for (iArg = 1; iArg < argc; iArg++) {
175 if (scArg[iArg].arg_type == OPTION) {
176 switch (matchCode = match_string(scArg[iArg].list[0], option, N_OPTIONS, 0)) {
177 case SET_MAJOR_ORDER:
178 majorOrderFlag = 0;
179 scArg[iArg].n_items -= 1;
180 if (scArg[iArg].n_items > 0 &&
181 !scanItemList(&majorOrderFlag, scArg[iArg].list + 1, &scArg[iArg].n_items, 0,
182 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
183 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)) {
184 SDDS_Bomb("invalid -majorOrder syntax/values");
185 }
186 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
187 columnMajorOrder = 1;
188 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
189 columnMajorOrder = 0;
190 break;
191
192 case SET_GAPIN:
193 if ((scArg[iArg].n_items -= 2) < 0 ||
194 !scanItemList(&flags, scArg[iArg].list + 2, &scArg[iArg].n_items, 0,
195 "amount", SDDS_DOUBLE, &gapAmount, 1, GAPIN_AMOUNT,
196 "factor", SDDS_DOUBLE, &gapFactor, 1, GAPIN_FACTOR, NULL) ||
197 (flags & GAPIN_AMOUNT && gapAmount <= 0) ||
198 (flags & GAPIN_FACTOR && gapFactor <= 0)) {
199 fprintf(stderr, "Error: invalid -gapin syntax/values\n");
200 return EXIT_FAILURE;
201 }
202 columnName = scArg[iArg].list[1];
203 mode = matchCode;
204 break;
205
206 case SET_INCREASEOF:
207 increaseOfFlags = 0;
208 if (scArg[iArg].n_items < 2) {
209 fprintf(stderr, "Error: invalid -increaseOf syntax\n");
210 return EXIT_FAILURE;
211 }
212 scArg[iArg].n_items -= 2;
213 if (!scanItemList(&increaseOfFlags, scArg[iArg].list + 2, &scArg[iArg].n_items, 0,
214 "amount", SDDS_DOUBLE, &increaseOfAmount, 1, INCREASEOF_AMOUNT,
215 "cumulative", -1, NULL, 0, INCREASEOF_CUMULATIVE,
216 "reset", -1, NULL, 0, INCREASEOF_RESET, NULL) ||
217 ((flags & INCREASEOF_AMOUNT) && increaseOfAmount <= 0)) {
218 fprintf(stderr, "Error: invalid -increaseOf syntax\n");
219 return EXIT_FAILURE;
220 }
221 columnName = scArg[iArg].list[1];
222 mode = matchCode;
223 break;
224
225 case SET_DECREASEOF:
226 decreaseOfFlags = 0;
227 if (scArg[iArg].n_items < 2) {
228 fprintf(stderr, "Error: invalid -decreaseOf syntax\n");
229 return EXIT_FAILURE;
230 }
231 scArg[iArg].n_items -= 2;
232 if (!scanItemList(&decreaseOfFlags, scArg[iArg].list + 2, &scArg[iArg].n_items, 0,
233 "amount", SDDS_DOUBLE, &decreaseOfAmount, 1, DECREASEOF_AMOUNT,
234 "cumulative", -1, NULL, 0, DECREASEOF_CUMULATIVE,
235 "reset", -1, NULL, 0, DECREASEOF_RESET, NULL) ||
236 ((flags & DECREASEOF_AMOUNT) && decreaseOfAmount <= 0)) {
237 fprintf(stderr, "Error: invalid -decreaseOf syntax\n");
238 return EXIT_FAILURE;
239 }
240 columnName = scArg[iArg].list[1];
241 mode = matchCode;
242 break;
243
244 case SET_CHANGEOF:
245 if ((scArg[iArg].n_items -= 2) < 0 ||
246 !scanItemList(&changeFlags, scArg[iArg].list + 2, &scArg[iArg].n_items, 0,
247 "amount", SDDS_DOUBLE, &changeAmount, 1, CHANGEOF_AMOUNT,
248 "base", SDDS_DOUBLE, &changeBase, 1, CHANGEOF_BASE, NULL) ||
249 (changeFlags & CHANGEOF_AMOUNT && changeAmount <= 0)) {
250 fprintf(stderr, "Error: invalid -changeof syntax/values\n");
251 return EXIT_FAILURE;
252 }
253 columnName = scArg[iArg].list[1];
254 mode = matchCode;
255 break;
256
257 case SET_ROWLIMIT:
258 if (scArg[iArg].n_items < 2) {
259 fprintf(stderr, "Error: invalid -rowlimit syntax\n");
260 return EXIT_FAILURE;
261 }
262 if (sscanf(scArg[iArg].list[1], "%" SCNd64, &rowLimit) != 1 || rowLimit <= 0) {
263 fprintf(stderr, "Error: invalid -rowlimit syntax\n");
264 return EXIT_FAILURE;
265 }
266 if (scArg[iArg].n_items > 2) {
267 scArg[iArg].n_items -= 2;
268 if (!scanItemList(&flags, scArg[iArg].list + 2, &scArg[iArg].n_items, 0,
269 "overlap", SDDS_LONG, &overlap, 1, ROWLIMIT_OVERLAP, NULL) ||
270 overlap < 0) {
271 fprintf(stderr, "Error: invalid overlap given in -rowlimit syntax\n");
272 return EXIT_FAILURE;
273 }
274 }
275 mode = matchCode;
276 break;
277
278 case SET_PIPE:
279 if (!processPipeOption(scArg[iArg].list + 1, scArg[iArg].n_items - 1, &pipeFlags)) {
280 fprintf(stderr, "Error: invalid -pipe syntax\n");
281 return EXIT_FAILURE;
282 }
283 break;
284
285 case SET_MATCHTO:
286 if ((scArg[iArg].n_items != 3 && scArg[iArg].n_items != 4) ||
287 strlen(columnName = scArg[iArg].list[1]) == 0 ||
288 strlen(matchPattern = scArg[iArg].list[2]) == 0) {
289 fprintf(stderr, "Error: invalid -matchTo syntax\n");
290 return EXIT_FAILURE;
291 }
292 if (scArg[iArg].n_items == 4) {
293 if (strncmp(scArg[iArg].list[3], "after", strlen(scArg[iArg].list[3])) == 0)
294 matchPatternAfter = 1;
295 else {
296 fprintf(stderr, "Error: invalid -matchTo syntax\n");
297 return EXIT_FAILURE;
298 }
299 }
300 mode = matchCode;
301 break;
302
303 case SET_PAGES_PER_PAGE:
304 if (scArg[iArg].n_items != 2) {
305 fprintf(stderr, "Error: invalid -pagesPerPage syntax\n");
306 return EXIT_FAILURE;
307 }
308 if (sscanf(scArg[iArg].list[1], "%" SCNd64, &pagesPerPage) != 1 || pagesPerPage <= 0) {
309 fprintf(stderr, "Error: invalid -pagesPerPage syntax\n");
310 return EXIT_FAILURE;
311 }
312 mode = matchCode;
313 break;
314
315 default:
316 fprintf(stderr, "Error: unknown switch: %s\n", scArg[iArg].list[0]);
317 fprintf(stderr, "%s", USAGE);
318 return EXIT_FAILURE;
319 }
320 } else {
321 if (input == NULL)
322 input = scArg[iArg].list[0];
323 else if (output == NULL)
324 output = scArg[iArg].list[0];
325 else {
326 fprintf(stderr, "Error: too many filenames given\n");
327 return EXIT_FAILURE;
328 }
329 }
330 }
331
332 processFilenames("sddsbreak", &input, &output, pipeFlags, 0, &tmpfile_used);
333
334 if (mode == -1) {
335 fprintf(stderr, "Error: no break mode specified\n");
336 return EXIT_FAILURE;
337 }
338
339 if (!SDDS_InitializeInput(&SDDSold, input) ||
340 !SDDS_InitializeCopy(&SDDSnew, &SDDSold, output, "w")) {
341 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
342 return EXIT_FAILURE;
343 }
344
345 SDDSnew.layout.data_mode.no_row_counts = 0;
346 if (columnMajorOrder != -1)
347 SDDSnew.layout.data_mode.column_major = columnMajorOrder;
348 else
349 SDDSnew.layout.data_mode.column_major = SDDSold.layout.data_mode.column_major;
350
351 if (!SDDS_WriteLayout(&SDDSnew)) {
352 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
353 return EXIT_FAILURE;
354 }
355
356 if (mode != SET_ROWLIMIT && mode!=SET_PAGES_PER_PAGE) {
357 if (SDDS_GetColumnInformation(&SDDSold, "type", &dataType, SDDS_BY_NAME, columnName) != SDDS_LONG) {
358 SDDS_SetError("Problem getting type information on given column");
359 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
360 return EXIT_FAILURE;
361 }
362 if (mode == SET_MATCHTO) {
363 if (!(dataType == SDDS_STRING)) {
364 fprintf(stderr, "Error: given column does not contain string data\n");
365 return EXIT_FAILURE;
366 }
367 } else if (!SDDS_NUMERIC_TYPE(dataType)) {
368 if (!(mode == SET_CHANGEOF && !(changeFlags & CHANGEOF_AMOUNT) && !(changeFlags & CHANGEOF_BASE))) {
369 fprintf(stderr, "Error: given column does not contain numeric data\n");
370 return EXIT_FAILURE;
371 }
372 }
373 }
374
375 while ((retval = SDDS_ReadPage(&SDDSold)) > 0) {
376 if ((rows = SDDS_CountRowsOfInterest(&SDDSold)) < 0) {
377 SDDS_SetError("Problem getting number of rows of tabular data");
378 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
379 return EXIT_FAILURE;
380 }
381 if (!SDDS_StartPage(&SDDSnew, rows) ||
382 !SDDS_CopyParameters(&SDDSnew, &SDDSold) ||
383 !SDDS_CopyArrays(&SDDSnew, &SDDSold)) {
384 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
385 return EXIT_FAILURE;
386 }
387 if (rows == 0) {
388 if (!SDDS_WritePage(&SDDSnew)) {
389 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
390 return EXIT_FAILURE;
391 }
392 continue;
393 }
394
395 switch (mode) {
396 case SET_GAPIN:
397 columnData = SDDS_GetColumnInDoubles(&SDDSold, columnName);
398 if (!columnData) {
399 SDDS_SetError("Unable to read specified column");
400 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
401 return EXIT_FAILURE;
402 }
403 if (!gapAmount && rows > 1) {
404 double *gap = tmalloc(sizeof(*gap) * rows);
405 for (i = 1; i < rows; i++)
406 gap[i - 1] = fabs(columnData[i] - columnData[i - 1]);
407 if (!compute_average(&gapAmount, gap, rows - 1)) {
408 fprintf(stderr, "Error: unable to determine default gap amount--couldn't find median gap\n");
409 free(gap);
410 return EXIT_FAILURE;
411 }
412 gapAmount *= (gapFactor ? gapFactor : 2);
413 free(gap);
414 }
415 {
416 int64_t newStart = 0;
417 for (i = 1; i <= rows; i++) {
418 if (i != rows && fabs(columnData[i] - columnData[i - 1]) < gapAmount)
419 continue;
420 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
421 !SDDS_WritePage(&SDDSnew)) {
422 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
423 free(columnData);
424 return EXIT_FAILURE;
425 }
426 newStart = i;
427 }
428 }
429 free(columnData);
430 break;
431
432 case SET_INCREASEOF:
433 columnData = SDDS_GetColumnInDoubles(&SDDSold, columnName);
434 if (!columnData) {
435 SDDS_SetError("Unable to read specified column");
436 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
437 return EXIT_FAILURE;
438 }
439 {
440 int64_t newStart = 0;
441 if (increaseOfAmount <= 0) {
442 for (i = 1; i <= rows; i++) {
443 if (i != rows && columnData[i] <= columnData[i - 1])
444 continue;
445 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
446 !SDDS_WritePage(&SDDSnew)) {
447 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
448 free(columnData);
449 return EXIT_FAILURE;
450 }
451 newStart = i;
452 }
453 } else {
454 if (increaseOfFlags & INCREASEOF_CUMULATIVE) {
455 long iref = 0;
456 for (i = 1; i <= rows; i++) {
457 if ((increaseOfFlags & INCREASEOF_RESET) && columnData[i] < columnData[iref])
458 iref = i;
459 if (i != rows && (columnData[i] - columnData[iref]) < increaseOfAmount)
460 continue;
461 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
462 !SDDS_WritePage(&SDDSnew)) {
463 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
464 free(columnData);
465 return EXIT_FAILURE;
466 }
467 newStart = i;
468 if (increaseOfFlags & INCREASEOF_CUMULATIVE)
469 iref = i;
470 }
471 } else {
472 for (i = 1; i <= rows; i++) {
473 if (i != rows && (columnData[i] - columnData[i - 1]) < increaseOfAmount)
474 continue;
475 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
476 !SDDS_WritePage(&SDDSnew)) {
477 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
478 free(columnData);
479 return EXIT_FAILURE;
480 }
481 newStart = i;
482 }
483 }
484 }
485 }
486 free(columnData);
487 break;
488
489 case SET_DECREASEOF:
490 columnData = SDDS_GetColumnInDoubles(&SDDSold, columnName);
491 if (!columnData) {
492 SDDS_SetError("Unable to read specified column");
493 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
494 return EXIT_FAILURE;
495 }
496 {
497 int64_t newStart = 0;
498 if (decreaseOfAmount <= 0) {
499 for (i = 1; i <= rows; i++) {
500 if (i != rows && columnData[i] >= columnData[i - 1])
501 continue;
502 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
503 !SDDS_WritePage(&SDDSnew)) {
504 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
505 free(columnData);
506 return EXIT_FAILURE;
507 }
508 newStart = i;
509 }
510 } else {
511 if (decreaseOfFlags & DECREASEOF_CUMULATIVE) {
512 long iref = 0;
513 for (i = 1; i <= rows; i++) {
514 if ((decreaseOfFlags & DECREASEOF_RESET) && columnData[i] > columnData[iref])
515 iref = i;
516 if (i != rows && (columnData[iref] - columnData[i]) < decreaseOfAmount)
517 continue;
518 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
519 !SDDS_WritePage(&SDDSnew)) {
520 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
521 free(columnData);
522 return EXIT_FAILURE;
523 }
524 newStart = i;
525 if (decreaseOfFlags & DECREASEOF_CUMULATIVE)
526 iref = i;
527 }
528 } else {
529 for (i = 1; i <= rows; i++) {
530 if (i != rows && (columnData[i - 1] - columnData[i]) < decreaseOfAmount)
531 continue;
532 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
533 !SDDS_WritePage(&SDDSnew)) {
534 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
535 free(columnData);
536 return EXIT_FAILURE;
537 }
538 newStart = i;
539 }
540 }
541 }
542 }
543 free(columnData);
544 break;
545
546 case SET_CHANGEOF:
547 if (dataType != SDDS_STRING) {
548 columnData = SDDS_GetColumnInDoubles(&SDDSold, columnName);
549 if (!columnData) {
550 SDDS_SetError("Unable to read specified column");
551 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
552 return EXIT_FAILURE;
553 }
554 } else {
555 stringData = SDDS_GetColumn(&SDDSold, columnName);
556 if (!stringData) {
557 SDDS_SetError("Unable to read specified column");
558 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
559 return EXIT_FAILURE;
560 }
561 }
562 {
563 int64_t newStart = 0;
564 if (dataType == SDDS_STRING || !changeAmount) {
565 for (i = 1; i <= rows; i++) {
566 if (i != rows &&
567 ((dataType == SDDS_STRING && strcmp(stringData[i], stringData[i - 1]) == 0) ||
568 (dataType != SDDS_STRING && columnData[i] == columnData[i - 1])))
569 continue;
570 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
571 !SDDS_WritePage(&SDDSnew)) {
572 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
573 if (dataType != SDDS_STRING)
574 free(columnData);
575 else
576 SDDS_FreeStringArray(stringData, rows);
577 return EXIT_FAILURE;
578 }
579 newStart = i;
580 }
581 } else {
582 long region = 0, lastRegion = 0;
583 if (!(changeFlags & CHANGEOF_BASE) && rows >= 1)
584 changeBase = columnData[0];
585 if (rows > 1)
586 lastRegion = (columnData[0] - changeBase) / changeAmount;
587
588 newStart = 0;
589 for (i = 1; i <= rows; i++) {
590 if (i != rows)
591 region = (columnData[i] - changeBase) / changeAmount;
592 if (i != rows && region == lastRegion)
593 continue;
594 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
595 !SDDS_WritePage(&SDDSnew)) {
596 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
597 if (dataType != SDDS_STRING)
598 free(columnData);
599 else
600 SDDS_FreeStringArray(stringData, rows);
601 return EXIT_FAILURE;
602 }
603 newStart = i;
604 lastRegion = region;
605 }
606 }
607 }
608 if (dataType != SDDS_STRING)
609 free(columnData);
610 else
611 SDDS_FreeStringArray(stringData, rows);
612 break;
613
614 case SET_MATCHTO:
615 stringData = SDDS_GetColumn(&SDDSold, columnName);
616 if (!stringData) {
617 SDDS_SetError("Unable to read specified column");
618 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
619 return EXIT_FAILURE;
620 }
621 {
622 int64_t newStart = 0;
623 breakNext = 0;
624 for (i = 1; i <= rows; i++) {
625 if (i != rows && !breakNext) {
626 if (wild_match(stringData[i], matchPattern)) {
627 if (matchPatternAfter) {
628 breakNext = 1;
629 continue;
630 }
631 } else {
632 continue;
633 }
634 }
635 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, newStart, i - 1) ||
636 !SDDS_WritePage(&SDDSnew)) {
637 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
638 SDDS_FreeStringArray(stringData, rows);
639 return EXIT_FAILURE;
640 }
641 breakNext = 0;
642 newStart = i;
643 }
644 }
645 SDDS_FreeStringArray(stringData, rows);
646 break;
647
648 case SET_ROWLIMIT:
649 for (i = 0; i < rows; i += rowLimit - overlap) {
650 if ((j = i + rowLimit - 1) >= rows)
651 j = rows - 1;
652 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, i, j)) {
653 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
654 return EXIT_FAILURE;
655 }
656 if (!SDDS_WritePage(&SDDSnew)) {
657 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
658 return EXIT_FAILURE;
659 }
660 if (j == rows - 1)
661 break;
662 }
663 break;
664
665 case SET_PAGES_PER_PAGE:
666 rowLimit = rows/pagesPerPage;
667 for (j=0; j<pagesPerPage; j++) {
668 int64_t iStart, iEnd;
669 iStart = j*rowLimit;
670 iEnd = j==(pagesPerPage-1)? rows-1 : (j+1)*rowLimit-1;
671 if (!SDDS_CopyRows(&SDDSnew, &SDDSold, iStart, iEnd)) {
672 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
673 return EXIT_FAILURE;
674 }
675 if (!SDDS_WritePage(&SDDSnew)) {
676 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
677 return EXIT_FAILURE;
678 }
679 }
680 break;
681
682 default:
683 fprintf(stderr, "Error: unknown break mode code seen---this can't happen\n");
684 return EXIT_FAILURE;
685 }
686 }
687
688 if (retval == 0) {
689 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
690 return EXIT_FAILURE;
691 }
692
693 if (!SDDS_Terminate(&SDDSold)) {
694 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
695 return EXIT_FAILURE;
696 }
697
698 if (!SDDS_Terminate(&SDDSnew)) {
699 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
700 return EXIT_FAILURE;
701 }
702
703 if (tmpfile_used && !replaceFileAndBackUp(input, output)) {
704 return EXIT_FAILURE;
705 }
706
707 return EXIT_SUCCESS;
708}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:334
int32_t SDDS_CopyRows(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, int64_t firstRow, int64_t lastRow)
Definition SDDS_copy.c:882
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
Definition SDDStypes.h:138
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long compute_average(double *value, double *data, int64_t n)
Computes the average of an array of doubles.
Definition median.c:144
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.
Definition wild_match.c:49