73char *option[N_OPTIONS] = {
81 "Usage: sddsnormalize [<inputfile>] [<outputfile>] \n"
82 " [-pipe=[input][,output]] \n"
83 " -columns=[mode=<mode>,][suffix=<string>,][exclude=<wildcardString>,][functionOf=<columnName>,]<columnName>[,...] \n"
84 " [-threads=<number>] \n"
85 " [-majorOrder=row|column] \n\n"
87 " <mode> Specifies the normalization mode. Available modes are:\n"
88 " minimum, maximum, largest, signedlargest,\n"
89 " spread, rms, standarddeviation, sum, area, or average.\n"
90 " - minimum : Use the minimum value as the normalization factor.\n"
91 " - maximum : Use the maximum value as the normalization factor.\n"
92 " - largest : Use the larger of |min| or |max| (default).\n"
93 " - signedlargest: Use the largest value with its sign retained.\n"
94 " - spread : Use (max - min) as the normalization factor.\n"
95 " - rms : Use the root-mean-square of the values.\n"
96 " - standarddeviation: Use the n-1 weighted standard deviation.\n"
97 " - sum : Use the sum of all values.\n"
98 " - area : Use the area under the curve (requires functionOf).\n"
99 " - average : Use the average of all values.\n"
100 " <string> Specifies a suffix to append to the column name for the normalized output.\n"
101 " If omitted, the original column is replaced.\n"
102 " <wildcardString> Excludes columns matching the wildcard pattern from normalization.\n"
103 " <columnName> Specifies the column(s) to normalize. Multiple columns can be separated by commas.\n"
104 " <number> Specifies the number of threads to use for normalization.\n"
105 " row|column Specifies the major order for data processing.\n\n"
106 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
109#define NORM_MINIMUM 0
110#define NORM_MAXIMUM 1
111#define NORM_LARGEST 2
112#define NORM_SLARGEST 3
118#define NORM_AVERAGE 9
119#define NORM_OPTIONS 10
120static char *normMode[NORM_OPTIONS] = {
134#define FL_SUFFIX_GIVEN 0x0001U
135#define FL_MODE_GIVEN 0x0002U
136#define FL_FUNCOF_GIVEN 0x0004U
140 char *suffix, **source, *exclude, *functionOf;
150 char *source, *target, *functionOf;
156int main(
int argc,
char **argv) {
160 long normRequests, normSpecs, i, readCode;
162 char *input, *output, *modeString;
163 unsigned long pipeFlags, majorOrderFlag;
164 SCANNED_ARG *scanned;
166 double *data, *funcOfData, factor, min, max;
167 short columnMajorOrder = -1;
171 argc =
scanargs(&scanned, argc, argv);
175 output = input = NULL;
179 normRequests = normSpecs = 0;
181 for (iArg = 1; iArg < argc; iArg++) {
182 if (scanned[iArg].arg_type == OPTION) {
184 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
185 case CLO_MAJOR_ORDER:
187 scanned[iArg].n_items--;
188 if (scanned[iArg].n_items > 0 && (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
"row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
"column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
189 SDDS_Bomb(
"invalid -majorOrder syntax/values");
190 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
191 columnMajorOrder = 1;
192 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
193 columnMajorOrder = 0;
196 if (!(normRequest =
SDDS_Realloc(normRequest,
sizeof(*normRequest) * (normRequests + 1))))
198 normRequest[normRequests].exclude = normRequest[normRequests].suffix = NULL;
200 scanned[iArg].list, &scanned[iArg].n_items,
201 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
202 SCANITEMLIST_IGNORE_VALUELESS,
203 "mode",
SDDS_STRING, &modeString, 1, FL_MODE_GIVEN,
204 "suffix",
SDDS_STRING, &normRequest[normRequests].suffix, 1, FL_SUFFIX_GIVEN,
205 "functionof",
SDDS_STRING, &normRequest[normRequests].functionOf, 1, FL_FUNCOF_GIVEN,
206 "exclude",
SDDS_STRING, &normRequest[normRequests].exclude, 1, 0, NULL))
208 if (normRequest[normRequests].flags & FL_MODE_GIVEN) {
209 if ((normRequest[normRequests].mode =
match_string(modeString, normMode, NORM_OPTIONS, 0)) < 0)
210 SDDS_Bomb(
"invalid -columns syntax: unknown mode");
212 normRequest[normRequests].mode = NORM_LARGEST;
213 if (scanned[iArg].n_items < 1)
214 SDDS_Bomb(
"invalid -columns syntax: no columns listed");
215 normRequest[normRequests].source = scanned[iArg].list + 1;
216 normRequest[normRequests].sources = scanned[iArg].n_items - 1;
220 if (scanned[iArg].n_items != 2 ||
221 !sscanf(scanned[iArg].list[1],
"%d", &threads) || threads < 1)
225 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
229 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
235 input = scanned[iArg].list[0];
237 output = scanned[iArg].list[0];
246 SDDS_Bomb(
"supply the names of columns to normalize with the -columns option");
251 if (!resolveColumnNames(&SDDSin, normRequest, normRequests, &normSpec, &normSpecs))
255 SDDS_Bomb(
"no columns selected for normalization");
259 if (columnMajorOrder != -1)
260 SDDSout.layout.data_mode.column_major = columnMajorOrder;
262 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
263 for (i = 0; i < normSpecs; i++) {
264 if (normSpec[i].flags & FL_SUFFIX_GIVEN) {
278 if ((rows = SDDS_RowCount(&SDDSin))) {
279 for (i = 0; i < normSpecs; i++) {
283 if (normSpec[i].functionOf &&
288 switch (normSpec[i].mode) {
304 factor = MAX(min, max);
307 if (fabs(min) > fabs(max))
316 for (j = factor = 0; j < rows; j++)
321 SDDS_Bomb(
"functionOf qualifier must be given for area normalization");
325 for (j = factor = 0; j < rows; j++)
330 SDDS_Bomb(
"Invalid normalization mode---programming error");
336 for (j = 0; j < rows; j++)
361 char **column, buffer[1024];
365 for (i = 0; i < normRequests; i++) {
367 if (normRequest[i].flags & FL_SUFFIX_GIVEN) {
368 if (!normRequest[i].suffix || !strlen(normRequest[i].suffix)) {
369 SDDS_SetError(
"resolveColumnNames: missing or blank suffix");
373 for (j = 0; j < normRequest[i].sources; j++) {
375 SDDS_SetError(
"resolveColumnNames: SDDS_SetColumnsOfInterest error");
379 if (normRequest[i].exclude &&
381 SDDS_SetError(
"resolveColumnNames: SDDS_SetColumnsOfInterest error");
385 sprintf(buffer,
"No match for column list: ");
386 for (j = 0; j < normRequest[i].sources; j++) {
387 strcat(buffer, normRequest[i].source[j]);
388 if (j != normRequest[i].sources - 1)
389 strcat(buffer,
", ");
394 if (!(normSpec =
SDDS_Realloc(normSpec,
sizeof(*normSpec) * (normSpecs + columns)))) {
395 SDDS_SetError(
"resolveColumnNames: Memory allocation failure");
398 for (j = 0; j < columns; j++) {
399 normSpec[j + normSpecs].source = column[j];
400 normSpec[j + normSpecs].mode = normRequest[i].mode;
401 normSpec[j + normSpecs].flags = normRequest[i].flags;
402 normSpec[j + normSpecs].functionOf = NULL;
403 if (normRequest[i].flags & FL_FUNCOF_GIVEN) {
404 if (!
SDDS_CopyString(&normSpec[j + normSpecs].functionOf, normRequest[i].functionOf)) {
405 SDDS_SetError(
"resolveColumnNames: Memory allocation failure");
409 normSpec[j + normSpecs].target = NULL;
410 if (normRequest[i].flags & FL_SUFFIX_GIVEN) {
411 sprintf(buffer,
"%s%s", normSpec[j + normSpecs].source, normRequest[i].suffix);
413 SDDS_SetError(
"resolveColumnNames: Memory allocation failure");
417 normSpec[j + normSpecs].target = normSpec[j + normSpecs].source;
419 normSpecs += columns;
421 *normSpecRet = normSpec;
422 *normSpecsRet = normSpecs;
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
double standardDeviationThreaded(double *x, long n, long numThreads)
Calculates the standard deviation of an array of doubles using multiple threads.
double rmsValueThreaded(double *y, long n, long numThreads)
Calculates the RMS (Root Mean Square) value of an array of doubles using multiple threads.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long trapazoidIntegration(double *x, double *y, long n, double *integral)
Computes the integral of a dataset using the trapezoidal rule.