62typedef char *STRING_PAIR[2];
64long rows_equate(
SDDS_DATASET *SDDS1, int64_t row1,
SDDS_DATASET *SDDS2, int64_t row2,
long equate_columns, STRING_PAIR *equate_column);
66char *option[N_OPTIONS] = {
78 "Usage: sddsmselect [OPTIONS] [<input1>] <input2> [<output>]\n"
81 " -pipe[=input][,output] Use pipe for input and/or output.\n"
82 " -match=<col1>=<col2>,... Specify columns to match between input1 and input2.\n"
83 " -equate=<col1>=<col2>,... Specify columns to equate between input1 and input2.\n"
84 " -invert Select rows with no matching rows in input2.\n"
85 " -reuse[=rows|page] Allow reuse of rows from input2.\n"
86 " -majorOrder=row|column Set output file order to row or column major.\n"
87 " -nowarnings Suppress warning messages.\n"
90 " sddsmselect selects data from <input1> to write to <output>\n"
91 " based on the presence or absence of matching data in <input2>.\n"
92 " If <output> is not specified, <input1> is replaced.\n"
94 " -match Specifies names of columns to match between <input1> and\n"
95 " <input2> for selection and placement of data.\n"
96 " -equate Specifies names of columns to equate between <input1> and\n"
97 " <input2> for selection and placement of data.\n"
98 " -reuse Allows rows of <input2> to be reused, i.e., matched with\n"
99 " multiple rows of <input1>.\n"
100 " Use -reuse=page to restrict to the first page of <input2>.\n"
101 " -invert Select rows from <input1> that have no matching rows in <input2>.\n"
102 " -majorOrder Set the output file to use row or column major order.\n"
103 " -nowarnings Do not print warning messages.\n"
105 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
107int main(
int argc,
char **argv) {
109 long i, i_arg, reuse, reusePage;
110 int64_t j, k, rows1, rows2, n, outputRow;
113 STRING_PAIR *match_column, *equate_column;
114 long match_columns, equate_columns;
115 char *input1, *input2, *output, *match_value;
116 long tmpfile_used, retval1, retval2;
117 long *row_used, warnings, invert;
118 unsigned long pipeFlags, majorOrderFlag;
119 short columnMajorOrder = -1;
122 argc =
scanargs(&s_arg, argc, argv);
126 input1 = input2 = output = NULL;
127 match_column = equate_column = NULL;
128 match_columns = equate_columns = reuse = reusePage = invert = 0;
133 for (i_arg = 1; i_arg < argc; i_arg++) {
134 if (s_arg[i_arg].arg_type == OPTION) {
136 switch (
match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
137 case SET_MAJOR_ORDER:
139 s_arg[i_arg].n_items--;
140 if (s_arg[i_arg].n_items > 0 &&
141 (!
scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items,
142 0,
"row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
143 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
144 SDDS_Bomb(
"Invalid -majorOrder syntax or values.");
145 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
146 columnMajorOrder = 1;
147 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
148 columnMajorOrder = 0;
150 case SET_MATCH_COLUMNS:
151 if (s_arg[i_arg].n_items < 2)
153 match_column =
trealloc(match_column,
sizeof(*match_column) * (match_columns + s_arg[i_arg].n_items - 1));
154 for (i = 1; i < s_arg[i_arg].n_items; i++) {
155 if ((ptr = strchr(s_arg[i_arg].list[i],
'=')))
158 ptr = s_arg[i_arg].list[i];
159 match_column[i - 1 + match_columns][0] = s_arg[i_arg].list[i];
160 match_column[i - 1 + match_columns][1] = ptr;
162 match_columns += s_arg[i_arg].n_items - 1;
164 case SET_EQUATE_COLUMNS:
165 if (s_arg[i_arg].n_items < 2)
167 equate_column =
trealloc(equate_column,
sizeof(*equate_column) * (equate_columns + s_arg[i_arg].n_items - 1));
168 for (i = 1; i < s_arg[i_arg].n_items; i++) {
169 if ((ptr = strchr(s_arg[i_arg].list[i],
'=')))
172 ptr = s_arg[i_arg].list[i];
173 equate_column[i - 1 + equate_columns][0] = s_arg[i_arg].list[i];
174 equate_column[i - 1 + equate_columns][1] = ptr;
176 equate_columns += s_arg[i_arg].n_items - 1;
179 if (s_arg[i_arg].n_items == 1)
182 char *reuseOptions[2] = {
"rows",
"page"};
183 for (i = 1; i < s_arg[i_arg].n_items; i++) {
184 switch (
match_string(s_arg[i_arg].list[i], reuseOptions, 2, 0)) {
205 if (!
processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags))
209 fprintf(stderr,
"Error: Unknown option: %s\n", s_arg[i_arg].list[0]);
215 input1 = s_arg[i_arg].list[0];
216 else if (input2 == NULL)
217 input2 = s_arg[i_arg].list[0];
218 else if (output == NULL)
219 output = s_arg[i_arg].list[0];
221 SDDS_Bomb(
"Too many filenames provided.");
225 if (pipeFlags & USE_STDIN && input1) {
227 SDDS_Bomb(
"Too many filenames with -pipe option.");
232 processFilenames(
"sddsmselect", &input1, &output, pipeFlags, !warnings, &tmpfile_used);
234 SDDS_Bomb(
"Second input file not specified.");
236 if (!match_columns && !equate_columns)
237 SDDS_Bomb(
"Either -match or -equate must be specified.");
248 for (i = 0; i < match_columns; i++) {
251 sprintf(s,
"Error: Column '%s' not found or not of string type in file '%s'.",
252 match_column[i][0], input1 ? input1 :
"stdin");
258 sprintf(s,
"Error: Column '%s' not found or not of string type in file '%s'.",
259 match_column[i][1], input2);
264 for (i = 0; i < equate_columns; i++) {
267 sprintf(s,
"Error: Column '%s' not found or not of numeric type in file '%s'.",
268 equate_column[i][0], input1 ? input1 :
"stdin");
274 sprintf(s,
"Error: Column '%s' not found or not of numeric type in file '%s'.",
275 equate_column[i][1], input2);
281 if (output && (pipeFlags & USE_STDOUT))
282 SDDS_Bomb(
"Too many filenames with -pipe option.");
283 if (!output && !(pipeFlags & USE_STDOUT)) {
285 fprintf(stderr,
"Warning: Existing file '%s' will be replaced.\n", input1 ? input1 :
"stdin");
293 if (columnMajorOrder != -1)
294 SDDS_output.layout.data_mode.column_major = columnMajorOrder;
296 SDDS_output.layout.data_mode.column_major = SDDS_1.layout.data_mode.column_major;
305 fprintf(stderr,
"Warning: <input2> ends before <input1>.\n");
324 row_used =
SDDS_Realloc(row_used,
sizeof(*row_used) * rows2);
333 SDDS_SetError(
"Problem copying parameter or array data from first input file.");
337 for (j = 0; j < rows1; j++) {
340 for (i = 0; i < match_columns; i++) {
341 if (!
SDDS_GetValue(&SDDS_1, match_column[i][0], j, &match_value)) {
342 sprintf(s,
"Problem getting column '%s' from file '%s'.",
343 match_column[i][0], input1 ? input1 :
"stdin");
348 sprintf(s,
"Problem setting rows of interest for column '%s'.",
355 if ((!n && !invert) || (n && invert))
358 for (k = 0; k < rows2; k++) {
367 equal = rows_equate(&SDDS_1, j, &SDDS_2, k, equate_columns, equate_column);
368 if ((equal && !invert) || (!equal && invert)) {
369 row_used[k] = reuse ? 0 : 1;
374 if ((k == rows2 && !invert) || (k != rows2 && invert))
378 sprintf(s,
"Problem copying to row %" PRId64
" of output from row %" PRId64
" of data set 1.",
404long rows_equate(
SDDS_DATASET *SDDS1, int64_t row1,
SDDS_DATASET *SDDS2, int64_t row2,
long equate_columns, STRING_PAIR *equate_column) {
406 long index1, index2, size, type, i;
407 char s[SDDS_MAXLINE];
409 for (i = 0; i < equate_columns; i++) {
416 sprintf(s,
"Problem equating rows--types don't match for columns '%s' and '%s'.",
417 equate_column[i][0], equate_column[i][1]);
422 data1 = (
char *)SDDS1->data[index1] + size * row1;
423 data2 = (
char *)SDDS2->data[index2] + size * row2;
424 if (memcmp(data1, data2, size) != 0)
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyRowDirect(SDDS_DATASET *SDDS_target, int64_t target_row, SDDS_DATASET *SDDS_source, int64_t source_row)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_CopyArrays(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_ZeroMemory(void *mem, int64_t n_bytes)
Sets a block of memory to zero.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
int32_t SDDS_GetTypeSize(int32_t type)
Retrieves the size in bytes of a specified SDDS data type.
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
char * tmpname(char *s)
Supplies a unique temporary filename.