56 "sddscollect [<input>] [<output>]\n"
57 " [-pipe=[input][,output]]\n"
58 " -collect={suffix=<string>|prefix=<string>|match=<string>}[,column=<newName>][,editCommand=<string>][,exclude=<wildcard>]\n"
60 " [-majorOrder=row|column]\n"
62 " -pipe=[input][,output]\n"
63 " Use the standard SDDS toolkit pipe option for input and output.\n"
64 " -collect={suffix=<string>|prefix=<string>|match=<string>}\n"
65 " Collects columns based on the specified suffix, prefix, or matching pattern.\n"
66 " Additional parameters:\n"
67 " column=<newName> (Optional) Name of the new column. Defaults to suffix or prefix.\n"
68 " editCommand=<string> (Optional) Command to edit the column names.\n"
69 " exclude=<wildcard> (Optional) Exclude columns matching the wildcard pattern.\n"
71 " Suppresses warning messages.\n"
72 " -majorOrder=row|column\n"
73 " Specifies the major order of the output file. Can be either row-major or column-major.\n"
74 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")";
85static char *option[N_OPTIONS] = {
94 char *part, *newColumn, *match, *editCommand, *exclude;
97 long oldColumns, targetIndex, size;
105 long size, targetIndex;
108#define COLLECTION_SUFFIX 0x0001U
109#define COLLECTION_PREFIX 0x0002U
110#define COLLECTION_COLUMN 0x0004U
111#define COLLECTION_MATCH 0x0008U
112#define COLLECTION_EDIT 0x0010U
113#define COLLECTION_EXCLUDE 0x0020U
116void CollectAndWriteData(
SDDS_DATASET *SDDSout,
COLLECTION *collection,
long collections,
NEW_PARAMETER *newParameter,
int newParameters,
char **rootname,
char **units,
long rootnames, int64_t inputRow,
long origPage);
120int main(
int argc,
char **argv) {
123 SCANNED_ARG *scanned;
124 unsigned long pipeFlags, flags, majorOrderFlag;
127 char **rootname, **units;
128 long collections, rootnames, code;
129 int newParameters = 0;
131 char *input, *output;
133 short columnMajorOrder = -1;
137 argc =
scanargs(&scanned, argc, argv);
143 input = output = NULL;
149 for (iArg = 1; iArg < argc; iArg++) {
150 if (scanned[iArg].arg_type == OPTION) {
152 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
153 case CLO_MAJOR_ORDER:
155 scanned[iArg].n_items--;
156 if (scanned[iArg].n_items > 0 &&
157 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
158 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
159 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) {
160 SDDS_Bomb(
"Invalid -majorOrder syntax/values");
162 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
163 columnMajorOrder = 1;
164 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
165 columnMajorOrder = 0;
168 if (!(collection =
SDDS_Realloc(collection,
sizeof(*collection) * (collections + 1)))) {
172 collection[ic].newColumn = collection[ic].part = collection[ic].match = collection[ic].editCommand = NULL;
173 collection[ic].exclude = NULL;
174 if (--scanned[iArg].n_items == 0 ||
175 !
scanItemList(&flags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
176 "suffix",
SDDS_STRING, &collection[ic].part, 1, COLLECTION_SUFFIX,
177 "prefix",
SDDS_STRING, &collection[ic].part, 1, COLLECTION_PREFIX,
178 "column",
SDDS_STRING, &collection[ic].newColumn, 1, COLLECTION_COLUMN,
179 "match",
SDDS_STRING, &collection[ic].match, 1, COLLECTION_MATCH,
180 "editcommand",
SDDS_STRING, &collection[ic].editCommand, 1, COLLECTION_EDIT,
181 "exclude",
SDDS_STRING, &collection[ic].exclude, 1, COLLECTION_EXCLUDE, NULL) ||
182 ((flags & COLLECTION_SUFFIX && flags & COLLECTION_PREFIX) ||
183 (flags & COLLECTION_SUFFIX && flags & COLLECTION_MATCH) ||
184 (flags & COLLECTION_PREFIX && flags & COLLECTION_MATCH))) {
187 if (flags & COLLECTION_MATCH &&
188 (!(flags & COLLECTION_EDIT) || !(flags & COLLECTION_COLUMN))) {
189 SDDS_Bomb(
"Invalid -collect syntax: must give editCommand and column with match");
191 collection[ic].flags = flags;
195 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags)) {
203 fprintf(stderr,
"Invalid option seen: %s\n", scanned[iArg].list[0]);
209 input = scanned[iArg].list[0];
211 output = scanned[iArg].list[0];
218 SDDS_Bomb(
"At least one -collect option must be given");
221 processFilenames(
"sddscollect", &input, &output, pipeFlags, !warnings, NULL);
227 rootnames = InitializeOutput(&SDDSout, output, &SDDSin, collection, collections,
228 &newParameter, &newParameters, &rootname, &units, warnings);
229 if (columnMajorOrder != -1)
230 SDDSout.layout.data_mode.column_major = columnMajorOrder;
232 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
241 GetAndOrganizeData(&SDDSin, collection, collections, newParameter, newParameters);
242 for (row = 0; row < rows; row++) {
243 CollectAndWriteData(&SDDSout, collection, collections, newParameter, newParameters,
244 rootname, units, rootnames, row, code);
260 NEW_PARAMETER *newParameter,
int newParameters,
char **rootname,
261 char **units,
long rootnames, int64_t inputRow,
long origPage) {
264 if (!
SDDS_SetColumn(SDDSout, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, rootname, rootnames,
"Rootname")) {
267 if (!
SDDS_SetColumn(SDDSout, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, units, rootnames,
"Units")) {
270 for (ic = 0; ic < collections; ic++) {
271 for (ir = 0; ir < rootnames; ir++) {
273 ir, collection[ic].targetIndex,
274 ((
char *)(collection[ic].data[ir])) + inputRow * collection[ic].size, -1)) {
281 for (ip = 0; ip < newParameters; ip++) {
283 newParameter[ip].name,
284 ((
char *)(newParameter[ip].data) + inputRow * newParameter[ip].size), NULL)) {
289 "OriginalPage", origPage, NULL) ||
298 for (ic = 0; ic < collections; ic++) {
299 for (ii = 0; ii < collection[ic].oldColumns; ii++) {
305 for (ip = 0; ip < newParameters; ip++) {
306 if (!(newParameter[ip].data =
SDDS_GetColumn(SDDSin, newParameter[ip].name))) {
314 int *newParameters,
char ***rootname,
char ***units,
long warnings) {
315 char **inputColumn, *partString;
316 long partLength, *inputLength;
317 int32_t inputColumns;
319 long ic, ii, ip, inputsUsed, rootnames;
320 char *matchString, *excludeString;
334 if (!(inputUsed = (
short *)calloc(inputColumns,
sizeof(*inputUsed))) ||
335 !(inputLength = (
long *)calloc(inputColumns,
sizeof(*inputLength)))) {
338 for (ii = 0; ii < inputColumns; ii++)
339 inputLength[ii] = strlen(inputColumn[ii]);
343 excludeString = NULL;
344 for (ic = 0; ic < collections; ic++) {
345 if (!collection[ic].newColumn)
347 if ((partString = collection[ic].part))
348 partLength = strlen(partString);
349 if (collection[ic].match) {
350 matchString = collection[ic].match;
353 if (collection[ic].exclude) {
354 excludeString = collection[ic].exclude;
356 collection[ic].oldColumn = NULL;
357 collection[ic].oldColumns = 0;
358 for (ii = 0; ii < inputColumns; ii++) {
361 if (partLength >= inputLength[ii])
364 if (
wild_match(inputColumn[ii], matchString)) {
365 if ((excludeString == NULL) || (!
wild_match(inputColumn[ii], excludeString))) {
366 if (!(collection[ic].oldColumn =
SDDS_Realloc(collection[ic].oldColumn,
367 sizeof(*collection[ic].oldColumn) * (collection[ic].oldColumns + 1)))) {
370 collection[ic].oldColumn[collection[ic].oldColumns] = inputColumn[ii];
373 collection[ic].oldColumns++;
376 }
else if (collection[ic].flags & COLLECTION_PREFIX) {
377 if (strncmp(partString, inputColumn[ii], partLength) == 0) {
378 if (!(collection[ic].oldColumn =
SDDS_Realloc(collection[ic].oldColumn,
379 sizeof(*collection[ic].oldColumn) * (collection[ic].oldColumns + 1)))) {
382 collection[ic].oldColumn[collection[ic].oldColumns] = inputColumn[ii];
385 collection[ic].oldColumns++;
388 if (strcmp(partString, inputColumn[ii] + inputLength[ii] - partLength) == 0) {
389 if (!(collection[ic].oldColumn =
SDDS_Realloc(collection[ic].oldColumn,
390 sizeof(*collection[ic].oldColumn) * (collection[ic].oldColumns + 1)))) {
393 collection[ic].oldColumn[collection[ic].oldColumns] = inputColumn[ii];
396 collection[ic].oldColumns++;
400 if (!collection[ic].oldColumns && warnings) {
401 fprintf(stderr,
"Warning (sddscollect): No columns in input for %s %s\n",
402 collection[ic].flags & COLLECTION_PREFIX ?
"prefix" :
"suffix", collection[ic].part);
404 if (!(collection[ic].data = (
void **)calloc(collection[ic].oldColumns,
sizeof(*collection[ic].data)))) {
409 if ((*newParameters = inputColumns - inputsUsed)) {
410 *newParameter = (
NEW_PARAMETER *)malloc(
sizeof(**newParameter) * (*newParameters));
411 for (ii = ip = 0; ii < inputColumns; ii++) {
424 *rootname = ConfirmMatchingColumns(collection, collections, SDDSin, SDDSout, &rootnames, units, warnings);
441 SDDS_DATASET *SDDSout,
long *rootnames,
char ***units,
long warnings) {
442 long ic, ip, ii, partLength;
443 char **rootname, editBuffer[1024];
452 for (ic = 0; ic < collections; ic++) {
453 if (!collection[ic].oldColumns)
455 if (collection[ic].part)
456 partLength = strlen(collection[ic].part);
457 if (collection[ic].part && collection[ic].flags & COLLECTION_SUFFIX) {
459 saveChar = collection[ic].part[0];
460 for (ip = 0; ip < collection[ic].oldColumns; ip++)
461 collection[ic].oldColumn[ip][strlen(collection[ic].oldColumn[ip]) - partLength] = 0;
462 qsort(collection[ic].oldColumn, collection[ic].oldColumns,
sizeof(*collection[ic].oldColumn),
string_cmpasc);
464 for (ip = 0; ip < collection[ic].oldColumns; ip++)
465 collection[ic].oldColumn[ip][strlen(collection[ic].oldColumn[ip])] = saveChar;
467 qsort(collection[ic].oldColumn, collection[ic].oldColumns,
sizeof(*collection[ic].oldColumn),
string_cmpasc);
471 *rootnames = collection[ic].oldColumns;
472 if (!(rootname = (
char **)malloc(
sizeof(*rootname) * (*rootnames)))) {
475 if (!(*units = (
char **)malloc(
sizeof(**units) * (*rootnames)))) {
478 for (ip = 0; ip < (*rootnames); ip++) {
482 if (collection[ic].flags & COLLECTION_EDIT) {
483 strcpy(editBuffer, collection[ic].oldColumn[ip]);
484 if (!
edit_string(editBuffer, collection[ic].editCommand)) {
485 SDDS_Bomb(
"Problem editing column name.");
488 }
else if (collection[ic].flags & COLLECTION_PREFIX) {
489 SDDS_CopyString(rootname + ip, collection[ic].oldColumn[ip] + partLength);
492 rootname[ip][strlen(rootname[ip]) - partLength] = 0;
499 for (ic = 0; ic < collections; ic++) {
500 if (!collection[ic].oldColumns)
502 if (collection[ic].oldColumns != (*rootnames)) {
503 fprintf(stderr,
"Error (sddscollect): Groups have different numbers of members\n");
504 for (ip = 0; ip < collections; ip++) {
505 fprintf(stderr,
"%ld in %s\n", collection[ip].oldColumns,
506 collection[ip].part ? collection[ip].part : collection[ip].match);
510 if (collection[ic].flags & COLLECTION_MATCH)
512 for (ip = 0; ip < collection[ic].oldColumns; ip++)
513 if (strstr(collection[ic].oldColumn[ip], rootname[ip]) == NULL) {
514 fprintf(stderr,
"Error (sddscollect): Mismatch with rootname %s for column %s in group %s\n",
515 rootname[ip], collection[ic].oldColumn[ip],
516 collection[ic].part ? collection[ic].part : collection[ic].match);
518 for (ic = 0; ic < collections; ic++) {
519 fprintf(stderr,
"Group %s (%ld):\n",
520 collection[ic].part ? collection[ic].part : collection[ic].match, ic);
521 for (ip = 0; ip < collection[ic].oldColumns; ip++)
522 fprintf(stderr,
" old column[%ld] = %s\n", ip, collection[ic].oldColumn[ip]);
528 for (ic = 0; ic < collections; ic++) {
532 if (!collection[ic].oldColumns)
535 (collection[ic].targetIndex =
SDDS_GetColumnIndex(SDDSout, collection[ic].newColumn)) < 0 ||
542 for (ii = 1; ii < collection[ic].oldColumns; ii++) {
543 if (
SDDS_CheckColumn(SDDSin, collection[ic].oldColumn[ii], NULL, type, stderr) == SDDS_CHECK_WRONGTYPE) {
544 fprintf(stderr,
"Error (sddscollect): Inconsistent data types for suffix/prefix/match %s\n",
545 collection[ic].part ? collection[ic].part : collection[ic].match);
548 if (
SDDS_CheckColumn(SDDSin, collection[ic].oldColumn[ii], units, type, NULL) == SDDS_CHECK_WRONGUNITS)
553 fprintf(stderr,
"Warning (sddscollect): Inconsistent units for suffix/prefix %s\n", collection[ic].part);
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllArrayDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all array definitions from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
int32_t SDDS_GetParameterType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a parameter in the SDDS dataset by its index.
int32_t SDDS_GetParameterIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named parameter in the SDDS dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_GetTypeSize(int32_t type)
Retrieves the size in bytes of a specified SDDS data type.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
long edit_string(char *text, char *edit)
Edits the provided text based on the specified edit commands.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Utility functions for SDDS dataset manipulation and string array operations.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int string_cmpasc(const void *a, const void *b)
Compare two strings in ascending order.
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.