50 "Usage: sddscollect [options] [<input>] [<output>]\n\n"
52 " -pipe=[input][,output]\n"
53 " Use the standard SDDS toolkit pipe option for input and output.\n\n"
54 " -collect={suffix=<string>|prefix=<string>|match=<string>}\n"
55 " Collects columns based on the specified suffix, prefix, or matching pattern.\n"
56 " Additional parameters:\n"
57 " column=<newName> (Optional) Name of the new column. Defaults to suffix or prefix.\n"
58 " editCommand=<string> (Optional) Command to edit the column names.\n"
59 " exclude=<wildcard> (Optional) Exclude columns matching the wildcard pattern.\n\n"
61 " Suppresses warning messages.\n\n"
62 " -majorOrder=row|column\n"
63 " Specifies the major order of the output file. Can be either row-major or column-major.\n\n"
64 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")";
75static char *option[N_OPTIONS] = {
84 char *part, *newColumn, *match, *editCommand, *exclude;
87 long oldColumns, targetIndex, size;
95 long size, targetIndex;
98#define COLLECTION_SUFFIX 0x0001U
99#define COLLECTION_PREFIX 0x0002U
100#define COLLECTION_COLUMN 0x0004U
101#define COLLECTION_MATCH 0x0008U
102#define COLLECTION_EDIT 0x0010U
103#define COLLECTION_EXCLUDE 0x0020U
106void CollectAndWriteData(
SDDS_DATASET *SDDSout,
COLLECTION *collection,
long collections,
NEW_PARAMETER *newParameter,
int newParameters,
char **rootname,
char **units,
long rootnames, int64_t inputRow,
long origPage);
110int main(
int argc,
char **argv) {
113 SCANNED_ARG *scanned;
114 unsigned long pipeFlags, flags, majorOrderFlag;
117 char **rootname, **units;
118 long collections, rootnames, code;
119 int newParameters = 0;
121 char *input, *output;
123 short columnMajorOrder = -1;
127 argc =
scanargs(&scanned, argc, argv);
133 input = output = NULL;
139 for (iArg = 1; iArg < argc; iArg++) {
140 if (scanned[iArg].arg_type == OPTION) {
142 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
143 case CLO_MAJOR_ORDER:
145 scanned[iArg].n_items--;
146 if (scanned[iArg].n_items > 0 &&
147 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
148 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
149 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) {
150 SDDS_Bomb(
"Invalid -majorOrder syntax/values");
152 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
153 columnMajorOrder = 1;
154 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
155 columnMajorOrder = 0;
158 if (!(collection =
SDDS_Realloc(collection,
sizeof(*collection) * (collections + 1)))) {
162 collection[ic].newColumn = collection[ic].part = collection[ic].match = collection[ic].editCommand = NULL;
163 collection[ic].exclude = NULL;
164 if (--scanned[iArg].n_items == 0 ||
165 !
scanItemList(&flags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
166 "suffix",
SDDS_STRING, &collection[ic].part, 1, COLLECTION_SUFFIX,
167 "prefix",
SDDS_STRING, &collection[ic].part, 1, COLLECTION_PREFIX,
168 "column",
SDDS_STRING, &collection[ic].newColumn, 1, COLLECTION_COLUMN,
169 "match",
SDDS_STRING, &collection[ic].match, 1, COLLECTION_MATCH,
170 "editcommand",
SDDS_STRING, &collection[ic].editCommand, 1, COLLECTION_EDIT,
171 "exclude",
SDDS_STRING, &collection[ic].exclude, 1, COLLECTION_EXCLUDE, NULL) ||
172 ((flags & COLLECTION_SUFFIX && flags & COLLECTION_PREFIX) ||
173 (flags & COLLECTION_SUFFIX && flags & COLLECTION_MATCH) ||
174 (flags & COLLECTION_PREFIX && flags & COLLECTION_MATCH))) {
177 if (flags & COLLECTION_MATCH &&
178 (!(flags & COLLECTION_EDIT) || !(flags & COLLECTION_COLUMN))) {
179 SDDS_Bomb(
"Invalid -collect syntax: must give editCommand and column with match");
181 collection[ic].flags = flags;
185 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags)) {
193 fprintf(stderr,
"Invalid option seen: %s\n", scanned[iArg].list[0]);
199 input = scanned[iArg].list[0];
201 output = scanned[iArg].list[0];
208 SDDS_Bomb(
"At least one -collect option must be given");
211 processFilenames(
"sddscollect", &input, &output, pipeFlags, !warnings, NULL);
217 rootnames = InitializeOutput(&SDDSout, output, &SDDSin, collection, collections,
218 &newParameter, &newParameters, &rootname, &units, warnings);
219 if (columnMajorOrder != -1)
220 SDDSout.layout.data_mode.column_major = columnMajorOrder;
222 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
231 GetAndOrganizeData(&SDDSin, collection, collections, newParameter, newParameters);
232 for (row = 0; row < rows; row++) {
233 CollectAndWriteData(&SDDSout, collection, collections, newParameter, newParameters,
234 rootname, units, rootnames, row, code);
250 NEW_PARAMETER *newParameter,
int newParameters,
char **rootname,
251 char **units,
long rootnames, int64_t inputRow,
long origPage) {
254 if (!
SDDS_SetColumn(SDDSout, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, rootname, rootnames,
"Rootname")) {
257 if (!
SDDS_SetColumn(SDDSout, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, units, rootnames,
"Units")) {
260 for (ic = 0; ic < collections; ic++) {
261 for (ir = 0; ir < rootnames; ir++) {
263 ir, collection[ic].targetIndex,
264 ((
char *)(collection[ic].data[ir])) + inputRow * collection[ic].size, -1)) {
271 for (ip = 0; ip < newParameters; ip++) {
273 newParameter[ip].name,
274 ((
char *)(newParameter[ip].data) + inputRow * newParameter[ip].size), NULL)) {
279 "OriginalPage", origPage, NULL) ||
288 for (ic = 0; ic < collections; ic++) {
289 for (ii = 0; ii < collection[ic].oldColumns; ii++) {
295 for (ip = 0; ip < newParameters; ip++) {
296 if (!(newParameter[ip].data =
SDDS_GetColumn(SDDSin, newParameter[ip].name))) {
304 int *newParameters,
char ***rootname,
char ***units,
long warnings) {
305 char **inputColumn, *partString;
306 long partLength, *inputLength;
307 int32_t inputColumns;
309 long ic, ii, ip, inputsUsed, rootnames;
310 char *matchString, *excludeString;
324 if (!(inputUsed = (
short *)calloc(inputColumns,
sizeof(*inputUsed))) ||
325 !(inputLength = (
long *)calloc(inputColumns,
sizeof(*inputLength)))) {
328 for (ii = 0; ii < inputColumns; ii++)
329 inputLength[ii] = strlen(inputColumn[ii]);
333 excludeString = NULL;
334 for (ic = 0; ic < collections; ic++) {
335 if (!collection[ic].newColumn)
337 if ((partString = collection[ic].part))
338 partLength = strlen(partString);
339 if (collection[ic].match) {
340 matchString = collection[ic].match;
343 if (collection[ic].exclude) {
344 excludeString = collection[ic].exclude;
346 collection[ic].oldColumn = NULL;
347 collection[ic].oldColumns = 0;
348 for (ii = 0; ii < inputColumns; ii++) {
351 if (partLength >= inputLength[ii])
354 if (
wild_match(inputColumn[ii], matchString)) {
355 if ((excludeString == NULL) || (!
wild_match(inputColumn[ii], excludeString))) {
356 if (!(collection[ic].oldColumn =
SDDS_Realloc(collection[ic].oldColumn,
357 sizeof(*collection[ic].oldColumn) * (collection[ic].oldColumns + 1)))) {
360 collection[ic].oldColumn[collection[ic].oldColumns] = inputColumn[ii];
363 collection[ic].oldColumns++;
366 }
else if (collection[ic].flags & COLLECTION_PREFIX) {
367 if (strncmp(partString, inputColumn[ii], partLength) == 0) {
368 if (!(collection[ic].oldColumn =
SDDS_Realloc(collection[ic].oldColumn,
369 sizeof(*collection[ic].oldColumn) * (collection[ic].oldColumns + 1)))) {
372 collection[ic].oldColumn[collection[ic].oldColumns] = inputColumn[ii];
375 collection[ic].oldColumns++;
378 if (strcmp(partString, inputColumn[ii] + inputLength[ii] - partLength) == 0) {
379 if (!(collection[ic].oldColumn =
SDDS_Realloc(collection[ic].oldColumn,
380 sizeof(*collection[ic].oldColumn) * (collection[ic].oldColumns + 1)))) {
383 collection[ic].oldColumn[collection[ic].oldColumns] = inputColumn[ii];
386 collection[ic].oldColumns++;
390 if (!collection[ic].oldColumns && warnings) {
391 fprintf(stderr,
"Warning (sddscollect): No columns in input for %s %s\n",
392 collection[ic].flags & COLLECTION_PREFIX ?
"prefix" :
"suffix", collection[ic].part);
394 if (!(collection[ic].data = (
void **)calloc(collection[ic].oldColumns,
sizeof(*collection[ic].data)))) {
399 if ((*newParameters = inputColumns - inputsUsed)) {
400 *newParameter = (
NEW_PARAMETER *)malloc(
sizeof(**newParameter) * (*newParameters));
401 for (ii = ip = 0; ii < inputColumns; ii++) {
414 *rootname = ConfirmMatchingColumns(collection, collections, SDDSin, SDDSout, &rootnames, units, warnings);
431 SDDS_DATASET *SDDSout,
long *rootnames,
char ***units,
long warnings) {
432 long ic, ip, ii, partLength;
433 char **rootname, editBuffer[1024];
442 for (ic = 0; ic < collections; ic++) {
443 if (!collection[ic].oldColumns)
445 if (collection[ic].part)
446 partLength = strlen(collection[ic].part);
447 if (collection[ic].part && collection[ic].flags & COLLECTION_SUFFIX) {
449 saveChar = collection[ic].part[0];
450 for (ip = 0; ip < collection[ic].oldColumns; ip++)
451 collection[ic].oldColumn[ip][strlen(collection[ic].oldColumn[ip]) - partLength] = 0;
452 qsort(collection[ic].oldColumn, collection[ic].oldColumns,
sizeof(*collection[ic].oldColumn),
string_cmpasc);
454 for (ip = 0; ip < collection[ic].oldColumns; ip++)
455 collection[ic].oldColumn[ip][strlen(collection[ic].oldColumn[ip])] = saveChar;
457 qsort(collection[ic].oldColumn, collection[ic].oldColumns,
sizeof(*collection[ic].oldColumn),
string_cmpasc);
461 *rootnames = collection[ic].oldColumns;
462 if (!(rootname = (
char **)malloc(
sizeof(*rootname) * (*rootnames)))) {
465 if (!(*units = (
char **)malloc(
sizeof(**units) * (*rootnames)))) {
468 for (ip = 0; ip < (*rootnames); ip++) {
472 if (collection[ic].flags & COLLECTION_EDIT) {
473 strcpy(editBuffer, collection[ic].oldColumn[ip]);
474 if (!edit_string(editBuffer, collection[ic].editCommand)) {
475 SDDS_Bomb(
"Problem editing column name.");
478 }
else if (collection[ic].flags & COLLECTION_PREFIX) {
479 SDDS_CopyString(rootname + ip, collection[ic].oldColumn[ip] + partLength);
482 rootname[ip][strlen(rootname[ip]) - partLength] = 0;
489 for (ic = 0; ic < collections; ic++) {
490 if (!collection[ic].oldColumns)
492 if (collection[ic].oldColumns != (*rootnames)) {
493 fprintf(stderr,
"Error (sddscollect): Groups have different numbers of members\n");
494 for (ip = 0; ip < collections; ip++) {
495 fprintf(stderr,
"%ld in %s\n", collection[ip].oldColumns,
496 collection[ip].part ? collection[ip].part : collection[ip].match);
500 if (collection[ic].flags & COLLECTION_MATCH)
502 for (ip = 0; ip < collection[ic].oldColumns; ip++)
503 if (strstr(collection[ic].oldColumn[ip], rootname[ip]) == NULL) {
504 fprintf(stderr,
"Error (sddscollect): Mismatch with rootname %s for column %s in group %s\n",
505 rootname[ip], collection[ic].oldColumn[ip],
506 collection[ic].part ? collection[ic].part : collection[ic].match);
508 for (ic = 0; ic < collections; ic++) {
509 fprintf(stderr,
"Group %s (%ld):\n",
510 collection[ic].part ? collection[ic].part : collection[ic].match, ic);
511 for (ip = 0; ip < collection[ic].oldColumns; ip++)
512 fprintf(stderr,
" old column[%ld] = %s\n", ip, collection[ic].oldColumn[ip]);
518 for (ic = 0; ic < collections; ic++) {
522 if (!collection[ic].oldColumns)
525 (collection[ic].targetIndex =
SDDS_GetColumnIndex(SDDSout, collection[ic].newColumn)) < 0 ||
532 for (ii = 1; ii < collection[ic].oldColumns; ii++) {
533 if (
SDDS_CheckColumn(SDDSin, collection[ic].oldColumn[ii], NULL, type, stderr) == SDDS_CHECK_WRONGTYPE) {
534 fprintf(stderr,
"Error (sddscollect): Inconsistent data types for suffix/prefix/match %s\n",
535 collection[ic].part ? collection[ic].part : collection[ic].match);
538 if (
SDDS_CheckColumn(SDDSin, collection[ic].oldColumn[ii], units, type, NULL) == SDDS_CHECK_WRONGUNITS)
543 fprintf(stderr,
"Warning (sddscollect): Inconsistent units for suffix/prefix %s\n", collection[ic].part);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllArrayDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all array definitions from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_GetParameterType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a parameter in the SDDS dataset by its index.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_GetTypeSize(int32_t type)
Retrieves the size in bytes of a specified SDDS data type.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int string_cmpasc(const void *a, const void *b)
Compare two strings in ascending order.
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.