63char *option[N_OPTIONS] = {
71 "Usage: sddsnormalize [<inputfile>] [<outputfile>] \n"
72 " [-pipe=[input][,output]] \n"
73 " -columns=[mode=<mode>,][suffix=<string>,][exclude=<wildcardString>,]<columnName>[,...] \n"
74 " [-threads=<number>] \n"
75 " [-majorOrder=row|column] \n\n"
77 " <mode> Specifies the normalization mode. Available modes are:\n"
78 " minimum, maximum, largest, signedlargest,\n"
79 " spread, rms, standarddeviation, sum, area, or average.\n"
80 " - minimum : Use the minimum value as the normalization factor.\n"
81 " - maximum : Use the maximum value as the normalization factor.\n"
82 " - largest : Use the larger of |min| or |max| (default).\n"
83 " - signedlargest: Use the largest value with its sign retained.\n"
84 " - spread : Use (max - min) as the normalization factor.\n"
85 " - rms : Use the root-mean-square of the values.\n"
86 " - standarddeviation: Use the n-1 weighted standard deviation.\n"
87 " - sum : Use the sum of all values.\n"
88 " - area : Use the area under the curve (requires functionOf).\n"
89 " - average : Use the average of all values.\n"
90 " <string> Specifies a suffix to append to the column name for the normalized output.\n"
91 " If omitted, the original column is replaced.\n"
92 " <wildcardString> Excludes columns matching the wildcard pattern from normalization.\n"
93 " <columnName> Specifies the column(s) to normalize. Multiple columns can be separated by commas.\n"
94 " <number> Specifies the number of threads to use for normalization.\n"
95 " row|column Specifies the major order for data processing.\n\n"
96 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
100#define NORM_MAXIMUM 1
101#define NORM_LARGEST 2
102#define NORM_SLARGEST 3
108#define NORM_AVERAGE 9
109#define NORM_OPTIONS 10
110static char *normMode[NORM_OPTIONS] = {
124#define FL_SUFFIX_GIVEN 0x0001U
125#define FL_MODE_GIVEN 0x0002U
126#define FL_FUNCOF_GIVEN 0x0004U
130 char *suffix, **source, *exclude, *functionOf;
140 char *source, *target, *functionOf;
146int main(
int argc,
char **argv) {
150 long normRequests, normSpecs, i, readCode;
152 char *input, *output, *modeString;
153 unsigned long pipeFlags, majorOrderFlag;
154 SCANNED_ARG *scanned;
156 double *data, *funcOfData, factor, min, max;
157 short columnMajorOrder = -1;
161 argc =
scanargs(&scanned, argc, argv);
165 output = input = NULL;
169 normRequests = normSpecs = 0;
171 for (iArg = 1; iArg < argc; iArg++) {
172 if (scanned[iArg].arg_type == OPTION) {
174 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
175 case CLO_MAJOR_ORDER:
177 scanned[iArg].n_items--;
178 if (scanned[iArg].n_items > 0 && (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
"row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
"column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
179 SDDS_Bomb(
"invalid -majorOrder syntax/values");
180 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
181 columnMajorOrder = 1;
182 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
183 columnMajorOrder = 0;
186 if (!(normRequest =
SDDS_Realloc(normRequest,
sizeof(*normRequest) * (normRequests + 1))))
188 normRequest[normRequests].exclude = normRequest[normRequests].suffix = NULL;
190 scanned[iArg].list, &scanned[iArg].n_items,
191 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
192 SCANITEMLIST_IGNORE_VALUELESS,
193 "mode",
SDDS_STRING, &modeString, 1, FL_MODE_GIVEN,
194 "suffix",
SDDS_STRING, &normRequest[normRequests].suffix, 1, FL_SUFFIX_GIVEN,
195 "functionof",
SDDS_STRING, &normRequest[normRequests].functionOf, 1, FL_FUNCOF_GIVEN,
196 "exclude",
SDDS_STRING, &normRequest[normRequests].exclude, 1, 0, NULL))
198 if (normRequest[normRequests].flags & FL_MODE_GIVEN) {
199 if ((normRequest[normRequests].mode =
match_string(modeString, normMode, NORM_OPTIONS, 0)) < 0)
200 SDDS_Bomb(
"invalid -columns syntax: unknown mode");
202 normRequest[normRequests].mode = NORM_LARGEST;
203 if (scanned[iArg].n_items < 1)
204 SDDS_Bomb(
"invalid -columns syntax: no columns listed");
205 normRequest[normRequests].source = scanned[iArg].list + 1;
206 normRequest[normRequests].sources = scanned[iArg].n_items - 1;
210 if (scanned[iArg].n_items != 2 ||
211 !sscanf(scanned[iArg].list[1],
"%d", &threads) || threads < 1)
215 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
219 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
225 input = scanned[iArg].list[0];
227 output = scanned[iArg].list[0];
236 SDDS_Bomb(
"supply the names of columns to normalize with the -columns option");
241 if (!resolveColumnNames(&SDDSin, normRequest, normRequests, &normSpec, &normSpecs))
245 SDDS_Bomb(
"no columns selected for normalization");
249 if (columnMajorOrder != -1)
250 SDDSout.layout.data_mode.column_major = columnMajorOrder;
252 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
253 for (i = 0; i < normSpecs; i++) {
254 if (normSpec[i].flags & FL_SUFFIX_GIVEN) {
268 if ((rows = SDDS_RowCount(&SDDSin))) {
269 for (i = 0; i < normSpecs; i++) {
273 if (normSpec[i].functionOf &&
278 switch (normSpec[i].mode) {
294 factor = MAX(min, max);
297 if (fabs(min) > fabs(max))
306 for (j = factor = 0; j < rows; j++)
311 SDDS_Bomb(
"functionOf qualifier must be given for area normalization");
315 for (j = factor = 0; j < rows; j++)
320 SDDS_Bomb(
"Invalid normalization mode---programming error");
326 for (j = 0; j < rows; j++)
351 char **column, buffer[1024];
355 for (i = 0; i < normRequests; i++) {
357 if (normRequest[i].flags & FL_SUFFIX_GIVEN) {
358 if (!normRequest[i].suffix || !strlen(normRequest[i].suffix)) {
359 SDDS_SetError(
"resolveColumnNames: missing or blank suffix");
363 for (j = 0; j < normRequest[i].sources; j++) {
365 SDDS_SetError(
"resolveColumnNames: SDDS_SetColumnsOfInterest error");
369 if (normRequest[i].exclude &&
371 SDDS_SetError(
"resolveColumnNames: SDDS_SetColumnsOfInterest error");
375 sprintf(buffer,
"No match for column list: ");
376 for (j = 0; j < normRequest[i].sources; j++) {
377 strcat(buffer, normRequest[i].source[j]);
378 if (j != normRequest[i].sources - 1)
379 strcat(buffer,
", ");
384 if (!(normSpec =
SDDS_Realloc(normSpec,
sizeof(*normSpec) * (normSpecs + columns)))) {
385 SDDS_SetError(
"resolveColumnNames: Memory allocation failure");
388 for (j = 0; j < columns; j++) {
389 normSpec[j + normSpecs].source = column[j];
390 normSpec[j + normSpecs].mode = normRequest[i].mode;
391 normSpec[j + normSpecs].flags = normRequest[i].flags;
392 normSpec[j + normSpecs].functionOf = NULL;
393 if (normRequest[i].flags & FL_FUNCOF_GIVEN) {
394 if (!
SDDS_CopyString(&normSpec[j + normSpecs].functionOf, normRequest[i].functionOf)) {
395 SDDS_SetError(
"resolveColumnNames: Memory allocation failure");
399 normSpec[j + normSpecs].target = NULL;
400 if (normRequest[i].flags & FL_SUFFIX_GIVEN) {
401 sprintf(buffer,
"%s%s", normSpec[j + normSpecs].source, normRequest[i].suffix);
403 SDDS_SetError(
"resolveColumnNames: Memory allocation failure");
407 normSpec[j + normSpecs].target = normSpec[j + normSpecs].source;
409 normSpecs += columns;
411 *normSpecRet = normSpec;
412 *normSpecsRet = normSpecs;
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
double standardDeviationThreaded(double *x, long n, long numThreads)
Calculates the standard deviation of an array of doubles using multiple threads.
double rmsValueThreaded(double *y, long n, long numThreads)
Calculates the RMS (Root Mean Square) value of an array of doubles using multiple threads.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long trapazoidIntegration(double *x, double *y, long n, double *integral)
Computes the integral of a dataset using the trapezoidal rule.