SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsnormalize.c File Reference

Detailed Description

A program for SDDS-format column normalization.

This program normalizes the specified columns of an SDDS file based on user-defined options. It provides various statistical modes for normalization, supports multithreading, and offers flexibility in data handling with features like custom suffixes and column exclusions.

Usage

sddsnormalize [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
-columns=[mode=<mode>,][suffix=<string>,][exclude=<wildcardString>,][functionOf=<columnName>,]<columnName>[,...]
[-threads=<number>]
[-majorOrder=row|column]

Options

Required Description
-columns Specifies the columns to normalize and their modes of normalization.
Optional Description
-pipe Specifies whether the input/output is piped.
-threads Defines the number of threads for parallel normalization.
-majorOrder Sets the processing order as row-major or column-major.
Column Modes Description
minimum Use the minimum value as the normalization factor.
maximum Use the maximum value as the normalization factor.
largest Use the larger of |min| or |max| (default).
signedlargest Use the largest value with its sign retained.
spread Use (max - min) as the normalization factor.
rms Use the root-mean-square of the values.
standarddeviation Use the n-1 weighted standard deviation.
sum Use the sum of all values.
area Use the area under the curve (requires functionOf).
average Use the average of all values.

Specific Requirements

  • For the area mode in -columns, the functionOf qualifier must be provided.
  • The -columns mode defaults to largest if not specified.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, H. Shang

Definition in file sddsnormalize.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include <ctype.h>

Go to the source code of this file.

Functions

long resolveColumnNames (SDDS_DATASET *SDDSin, NORM_REQUEST *normRequest, long normRequests, NORM_SPEC **normSpecRet, long *normSpecsRet)
 
int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 156 of file sddsnormalize.c.

156 {
157 int iArg;
158 NORM_REQUEST *normRequest;
159 NORM_SPEC *normSpec;
160 long normRequests, normSpecs, i, readCode;
161 int64_t j, rows;
162 char *input, *output, *modeString;
163 unsigned long pipeFlags, majorOrderFlag;
164 SCANNED_ARG *scanned;
165 SDDS_DATASET SDDSin, SDDSout;
166 double *data, *funcOfData, factor, min, max;
167 short columnMajorOrder = -1;
168 int threads = 1;
169
171 argc = scanargs(&scanned, argc, argv);
172 if (argc < 3)
173 bomb(NULL, USAGE);
174
175 output = input = NULL;
176 pipeFlags = 0;
177 normRequest = NULL;
178 normSpec = NULL;
179 normRequests = normSpecs = 0;
180
181 for (iArg = 1; iArg < argc; iArg++) {
182 if (scanned[iArg].arg_type == OPTION) {
183 /* process options here */
184 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
185 case CLO_MAJOR_ORDER:
186 majorOrderFlag = 0;
187 scanned[iArg].n_items--;
188 if (scanned[iArg].n_items > 0 && (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
189 SDDS_Bomb("invalid -majorOrder syntax/values");
190 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
191 columnMajorOrder = 1;
192 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
193 columnMajorOrder = 0;
194 break;
195 case CLO_COLUMNS:
196 if (!(normRequest = SDDS_Realloc(normRequest, sizeof(*normRequest) * (normRequests + 1))))
197 SDDS_Bomb("memory allocation failure");
198 normRequest[normRequests].exclude = normRequest[normRequests].suffix = NULL;
199 if (!scanItemList(&normRequest[normRequests].flags,
200 scanned[iArg].list, &scanned[iArg].n_items,
201 SCANITEMLIST_UNKNOWN_VALUE_OK | SCANITEMLIST_REMOVE_USED_ITEMS |
202 SCANITEMLIST_IGNORE_VALUELESS,
203 "mode", SDDS_STRING, &modeString, 1, FL_MODE_GIVEN,
204 "suffix", SDDS_STRING, &normRequest[normRequests].suffix, 1, FL_SUFFIX_GIVEN,
205 "functionof", SDDS_STRING, &normRequest[normRequests].functionOf, 1, FL_FUNCOF_GIVEN,
206 "exclude", SDDS_STRING, &normRequest[normRequests].exclude, 1, 0, NULL))
207 SDDS_Bomb("invalid -columns syntax");
208 if (normRequest[normRequests].flags & FL_MODE_GIVEN) {
209 if ((normRequest[normRequests].mode = match_string(modeString, normMode, NORM_OPTIONS, 0)) < 0)
210 SDDS_Bomb("invalid -columns syntax: unknown mode");
211 } else
212 normRequest[normRequests].mode = NORM_LARGEST;
213 if (scanned[iArg].n_items < 1)
214 SDDS_Bomb("invalid -columns syntax: no columns listed");
215 normRequest[normRequests].source = scanned[iArg].list + 1;
216 normRequest[normRequests].sources = scanned[iArg].n_items - 1;
217 normRequests++;
218 break;
219 case CLO_THREADS:
220 if (scanned[iArg].n_items != 2 ||
221 !sscanf(scanned[iArg].list[1], "%d", &threads) || threads < 1)
222 SDDS_Bomb("invalid -threads syntax");
223 break;
224 case CLO_PIPE:
225 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
226 SDDS_Bomb("invalid -pipe syntax");
227 break;
228 default:
229 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
230 exit(EXIT_FAILURE);
231 break;
232 }
233 } else {
234 if (!input)
235 input = scanned[iArg].list[0];
236 else if (!output)
237 output = scanned[iArg].list[0];
238 else
239 SDDS_Bomb("too many filenames seen");
240 }
241 }
242
243 processFilenames("sddsnormalize", &input, &output, pipeFlags, 0, NULL);
244
245 if (!normRequests)
246 SDDS_Bomb("supply the names of columns to normalize with the -columns option");
247
248 if (!SDDS_InitializeInput(&SDDSin, input))
249 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
250
251 if (!resolveColumnNames(&SDDSin, normRequest, normRequests, &normSpec, &normSpecs))
252 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
253
254 if (!normSpecs)
255 SDDS_Bomb("no columns selected for normalization");
256
257 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
258 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
259 if (columnMajorOrder != -1)
260 SDDSout.layout.data_mode.column_major = columnMajorOrder;
261 else
262 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
263 for (i = 0; i < normSpecs; i++) {
264 if (normSpec[i].flags & FL_SUFFIX_GIVEN) {
265 if (!SDDS_TransferColumnDefinition(&SDDSout, &SDDSin, normSpec[i].source, normSpec[i].target) ||
266 !SDDS_ChangeColumnInformation(&SDDSout, "units", "", SDDS_BY_NAME, normSpec[i].target))
267 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
268 } else if (!SDDS_ChangeColumnInformation(&SDDSout, "units", "Normalized", SDDS_BY_NAME, normSpec[i].target))
269 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
270 }
271
272 if (!SDDS_WriteLayout(&SDDSout))
273 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
274
275 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
276 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
277 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
278 if ((rows = SDDS_RowCount(&SDDSin))) {
279 for (i = 0; i < normSpecs; i++) {
280 if (!(data = SDDS_GetColumnInDoubles(&SDDSin, normSpec[i].source)))
281 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
282 funcOfData = NULL;
283 if (normSpec[i].functionOf &&
284 !(funcOfData = SDDS_GetColumnInDoubles(&SDDSin, normSpec[i].functionOf)))
285 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
286 if (!find_min_max(&min, &max, data, rows))
287 min = max = 1;
288 switch (normSpec[i].mode) {
289 case NORM_RMS:
290 factor = rmsValueThreaded(data, rows, threads);
291 break;
292 case NORM_STDEV:
293 factor = standardDeviationThreaded(data, rows, threads);
294 break;
295 case NORM_MINIMUM:
296 factor = min;
297 break;
298 case NORM_MAXIMUM:
299 factor = max;
300 break;
301 case NORM_LARGEST:
302 min = fabs(min);
303 max = fabs(max);
304 factor = MAX(min, max);
305 break;
306 case NORM_SLARGEST:
307 if (fabs(min) > fabs(max))
308 factor = min;
309 else
310 factor = max;
311 break;
312 case NORM_SPREAD:
313 factor = max - min;
314 break;
315 case NORM_SUM:
316 for (j = factor = 0; j < rows; j++)
317 factor += data[j];
318 break;
319 case NORM_AREA:
320 if (!funcOfData)
321 SDDS_Bomb("functionOf qualifier must be given for area normalization");
322 trapazoidIntegration(funcOfData, data, rows, &factor);
323 break;
324 case NORM_AVERAGE:
325 for (j = factor = 0; j < rows; j++)
326 factor += data[j];
327 factor /= rows;
328 break;
329 default:
330 SDDS_Bomb("Invalid normalization mode---programming error");
331 break;
332 }
333 if (funcOfData)
334 free(funcOfData);
335 if (factor)
336 for (j = 0; j < rows; j++)
337 data[j] /= factor;
338 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, data, rows, normSpec[i].target))
339 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
340 free(data);
341 }
342 }
343 if (!SDDS_WritePage(&SDDSout))
344 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
345 }
346 if (!SDDS_Terminate(&SDDSin)) {
347 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
348 return EXIT_FAILURE;
349 }
350 if (!SDDS_Terminate(&SDDSout)) {
351 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
352 return EXIT_FAILURE;
353 }
354
355 return EXIT_SUCCESS;
356}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
double standardDeviationThreaded(double *x, long n, long numThreads)
Calculates the standard deviation of an array of doubles using multiple threads.
Definition moments.c:51
double rmsValueThreaded(double *y, long n, long numThreads)
Calculates the RMS (Root Mean Square) value of an array of doubles using multiple threads.
Definition moments.c:597
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long trapazoidIntegration(double *x, double *y, long n, double *integral)
Computes the integral of a dataset using the trapezoidal rule.
Definition trapInteg.c:29

◆ resolveColumnNames()

long resolveColumnNames ( SDDS_DATASET * SDDSin,
NORM_REQUEST * normRequest,
long normRequests,
NORM_SPEC ** normSpecRet,
long * normSpecsRet )

Definition at line 358 of file sddsnormalize.c.

358 {
359 long i, j;
360 int32_t columns;
361 char **column, buffer[1024];
362 long normSpecs = 0;
363 NORM_SPEC *normSpec = NULL;
364
365 for (i = 0; i < normRequests; i++) {
366 SDDS_SetColumnFlags(SDDSin, 0);
367 if (normRequest[i].flags & FL_SUFFIX_GIVEN) {
368 if (!normRequest[i].suffix || !strlen(normRequest[i].suffix)) {
369 SDDS_SetError("resolveColumnNames: missing or blank suffix");
370 return 0;
371 }
372 }
373 for (j = 0; j < normRequest[i].sources; j++) {
374 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, normRequest[i].source[j], SDDS_OR)) {
375 SDDS_SetError("resolveColumnNames: SDDS_SetColumnsOfInterest error");
376 return 0;
377 }
378 }
379 if (normRequest[i].exclude &&
380 !SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, normRequest[i].exclude, SDDS_NEGATE_MATCH | SDDS_AND)) {
381 SDDS_SetError("resolveColumnNames: SDDS_SetColumnsOfInterest error");
382 return 0;
383 }
384 if (!(column = SDDS_GetColumnNames(SDDSin, &columns)) || columns == 0) {
385 sprintf(buffer, "No match for column list: ");
386 for (j = 0; j < normRequest[i].sources; j++) {
387 strcat(buffer, normRequest[i].source[j]);
388 if (j != normRequest[i].sources - 1)
389 strcat(buffer, ", ");
390 }
391 SDDS_SetError(buffer);
392 return 0;
393 }
394 if (!(normSpec = SDDS_Realloc(normSpec, sizeof(*normSpec) * (normSpecs + columns)))) {
395 SDDS_SetError("resolveColumnNames: Memory allocation failure");
396 return 0;
397 }
398 for (j = 0; j < columns; j++) {
399 normSpec[j + normSpecs].source = column[j];
400 normSpec[j + normSpecs].mode = normRequest[i].mode;
401 normSpec[j + normSpecs].flags = normRequest[i].flags;
402 normSpec[j + normSpecs].functionOf = NULL;
403 if (normRequest[i].flags & FL_FUNCOF_GIVEN) {
404 if (!SDDS_CopyString(&normSpec[j + normSpecs].functionOf, normRequest[i].functionOf)) {
405 SDDS_SetError("resolveColumnNames: Memory allocation failure");
406 return 0;
407 }
408 }
409 normSpec[j + normSpecs].target = NULL;
410 if (normRequest[i].flags & FL_SUFFIX_GIVEN) {
411 sprintf(buffer, "%s%s", normSpec[j + normSpecs].source, normRequest[i].suffix);
412 if (!SDDS_CopyString(&normSpec[j + normSpecs].target, buffer)) {
413 SDDS_SetError("resolveColumnNames: Memory allocation failure");
414 return 0;
415 }
416 } else
417 normSpec[j + normSpecs].target = normSpec[j + normSpecs].source;
418 }
419 normSpecs += columns;
420 }
421 *normSpecRet = normSpec;
422 *normSpecsRet = normSpecs;
423 return 1;
424}
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856