48#include "match_string.h"
53 CLO_INDEPENDENT_COLUMN,
67char *commandline_option[N_OPTIONS] = {
68 "independentVariable",
81#define SIGMA_GENERATE 0
82#define SIGMA_OPTIONS 1
83char *sigma_option[SIGMA_OPTIONS] = {
87#define DEFAULT_EXCLUDED_COLUMNS 1
88char *defaultExcludedColumn[DEFAULT_EXCLUDED_COLUMNS] = {
93 "Usage: sddsslopes <inputfile> <outputfile> [OPTIONS]\n\n"
95 " -pipe=[input][,output] Read input or write output from/to a pipe.\n"
96 " -independentVariable=<columnName> Specify the independent variable column.\n"
97 " -range=<lower>,<upper> Specify the range of the independent variable for fitting.\n"
98 " -columns=<list-of-names> Comma-separated list of columns to perform fits on.\n"
99 " -excludeColumns=<list-of-names> Comma-separated list of columns to exclude from fitting.\n"
100 " -sigma[=generate][,minimum=<val>] Calculate errors using sigma columns or generate them.\n"
101 " -residual=<file> Output file for residuals of the linear fit.\n"
102 " -ascii Output slopes file in ASCII mode (default is binary).\n"
103 " -verbose Enable verbose output to stderr.\n"
104 " -majorOrder=<row|column> Specify output file ordering.\n"
105 " -nowarnings Suppress warning messages.\n\n"
107 " Performs straight line fits on numerical columns in the input SDDS file using a specified\n"
108 " independent variable. Outputs the slope and intercept for each selected column.\n"
109 " The independent variable column is excluded from the output but its name is stored as a parameter.\n\n"
111 " sddsslopes data_input.sdds data_output.sdds -independentVariable=Time -columns=X,Y,Z -verbose\n"
112 "Program by Louis Emery, ANL (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
114long set_multicolumn_flags(
SDDS_TABLE *SDDSin,
char ***column, int32_t *columns,
char **exclude,
long excludes);
116int main(
int argc,
char **argv) {
117 SCANNED_ARG *scanned;
118 SDDS_TABLE inputPage, outputPage, residualPage;
119 char *inputfile, *outputfile;
120 char **column, **excludeColumn;
126 double *indVar, *indVarOrig;
128 char **intColumn, **slopeColumn, **slopeSigmaColumn, **interceptSigmaColumn;
129 char *Units, *slopeUnits;
130 double *depVar, *depVarOrig;
132 double *coef, *coefsigma, *weight, *diff, *diffOrig, chi;
134 int64_t i, j, rows, rowsOrig, iRow;
136 double slopeSigma, interceptSigma;
137 char **sigmaColumn, **chiSquaredColumn;
138 long *sigmaColumnExists;
139 long doSlopeSigma, generateSigma, doPreliminaryFit;
141 double sigmaSum, averageSigma, minSigma;
144 unsigned long pipeFlags;
145 long tmpfile_used, noWarnings;
146 long unsigned int majorOrderFlag;
148 short columnMajorOrder = -1;
150 indVar = indVarOrig = depVar = depVarOrig = coef = coefsigma = weight = diff = NULL;
151 intColumn = slopeColumn = slopeSigmaColumn = interceptSigmaColumn = sigmaColumn = chiSquaredColumn = NULL;
153 sigmaColumnExists = NULL;
156 argc =
scanargs(&scanned, argc, argv);
160 inputfile = outputfile = NULL;
161 columns = excludeColumns = 0;
162 column = excludeColumn = NULL;
163 indColumnName = NULL;
168 doPreliminaryFit = 0;
176 for (iArg = 1; iArg < argc; iArg++) {
177 if (scanned[iArg].arg_type == OPTION) {
179 switch (
match_string(scanned[iArg].list[0], commandline_option, N_OPTIONS, UNIQUE_MATCH)) {
180 case CLO_MAJOR_ORDER:
182 scanned[iArg].n_items--;
183 if (scanned[iArg].n_items > 0 &&
184 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
185 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
186 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
187 SDDS_Bomb(
"invalid -majorOrder syntax/values");
188 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
189 columnMajorOrder = 1;
190 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
191 columnMajorOrder = 0;
193 case CLO_INDEPENDENT_COLUMN:
194 if (!(indColumnName = scanned[iArg].list[1]))
195 SDDS_Bomb(
"no string given for option -independentVariable");
199 SDDS_Bomb(
"only one -columns option may be given");
200 if (scanned[iArg].n_items < 2)
202 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
203 for (i = 0; i < columns; i++)
204 column[i] = scanned[iArg].list[i + 1];
208 SDDS_Bomb(
"only one -excludecolumns option may be given");
209 if (scanned[iArg].n_items < 2)
210 SDDS_Bomb(
"invalid -excludecolumns syntax");
211 excludeColumn =
tmalloc(
sizeof(*excludeColumn) * (excludeColumns = scanned[iArg].n_items - 1));
212 for (i = 0; i < excludeColumns; i++)
213 excludeColumn[i] = scanned[iArg].list[i + 1];
222 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
227 if (scanned[iArg].n_items > 1) {
228 unsigned long sigmaFlags = 0;
229 scanned[iArg].n_items--;
230 if (!
scanItemList(&sigmaFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
231 "generate", -1, NULL, 0, 1,
241 if (!(residualFile = scanned[iArg].list[1])) {
242 fprintf(stderr,
"No file specified in -residual option.\n");
247 if (scanned[iArg].n_items != 3 ||
248 1 != sscanf(scanned[iArg].list[1],
"%lf", &xMin) ||
249 1 != sscanf(scanned[iArg].list[2],
"%lf", &xMax) ||
253 case CLO_NO_WARNINGS:
262 inputfile = scanned[iArg].list[0];
263 else if (!outputfile)
264 outputfile = scanned[iArg].list[0];
270 if (residualFile && outputfile) {
271 if (!strcmp(residualFile, outputfile)) {
272 fprintf(stderr,
"Residual file can't be the same as the output file.\n");
277 processFilenames(
"sddsslopes", &inputfile, &outputfile, pipeFlags, noWarnings, &tmpfile_used);
279 if (!indColumnName) {
280 fprintf(stderr,
"independentVariable not given\n");
284 if (!excludeColumns) {
285 excludeColumn = defaultExcludedColumn;
286 excludeColumns = DEFAULT_EXCLUDED_COLUMNS;
290 fprintf(stderr,
"Reading file %s.\n", inputfile);
293 while (0 < (ipage = SDDS_ReadTable(&inputPage))) {
295 fprintf(stderr,
"working on page %ld\n", ipage);
303 indVar = (
double *)malloc(
sizeof(*indVar) * rows);
305 fprintf(stderr,
"Memory allocation failed for indVar.\n");
309 indVar = (
double *)realloc(indVar,
sizeof(*indVar) * rows);
311 fprintf(stderr,
"Memory reallocation failed for indVar.\n");
316 if (!
SDDS_FindColumn(&inputPage, FIND_NUMERIC_TYPE, indColumnName, NULL)) {
317 fprintf(stderr,
"Something wrong with column %s.\n", indColumnName);
326 for (i = j = 0; i < rowsOrig; i++) {
327 if (indVarOrig[i] <= xMax && indVarOrig[i] >= xMin) {
328 indVar[j] = indVarOrig[i];
340 indVarUnits = (
char *)malloc(
sizeof(*indVarUnits));
349 if (!
SDDS_InitializeOutput(&residualPage, ascii ? SDDS_ASCII : SDDS_BINARY, 1,
"Residual of 2-term fit", NULL, outputfile) ||
352 if (columnMajorOrder != -1)
353 residualPage.layout.data_mode.column_major = columnMajorOrder;
355 residualPage.layout.data_mode.column_major = inputPage.layout.data_mode.column_major;
367 if (!set_multicolumn_flags(&inputPage, &column, &columns, excludeColumn, excludeColumns)) {
375 intColumn = (
char **)malloc((
sizeof(
char *) * columns));
376 slopeColumn = (
char **)malloc((
sizeof(
char *) * columns));
378 slopeSigmaColumn = (
char **)malloc((
sizeof(
char *) * columns));
379 interceptSigmaColumn = (
char **)malloc((
sizeof(
char *) * columns));
380 chiSquaredColumn = (
char **)malloc((
sizeof(
char *) * columns));
382 for (i = 0; i < columns; i++) {
383 intColumn[i] = (
char *)malloc((
sizeof(
char) * (strlen(column[i]) + strlen(
"Intercept") + 1)));
384 snprintf(intColumn[i], strlen(column[i]) + strlen(
"Intercept") + 1,
"%sIntercept", column[i]);
386 slopeColumn[i] = (
char *)malloc((
sizeof(
char) * (strlen(column[i]) + strlen(
"Slope") + 1)));
387 snprintf(slopeColumn[i], strlen(column[i]) + strlen(
"Slope") + 1,
"%sSlope", column[i]);
390 slopeSigmaColumn[i] = (
char *)malloc((
sizeof(
char) * (strlen(column[i]) + strlen(
"SlopeSigma") + 1)));
391 snprintf(slopeSigmaColumn[i], strlen(column[i]) + strlen(
"SlopeSigma") + 1,
"%sSlopeSigma", column[i]);
393 interceptSigmaColumn[i] = (
char *)malloc((
sizeof(
char) * (strlen(column[i]) + strlen(
"InterceptSigma") + 1)));
394 snprintf(interceptSigmaColumn[i], strlen(column[i]) + strlen(
"InterceptSigma") + 1,
"%sInterceptSigma", column[i]);
396 chiSquaredColumn[i] = (
char *)malloc((
sizeof(
char) * (strlen(column[i]) + strlen(
"ChiSquared") + 1)));
397 snprintf(chiSquaredColumn[i], strlen(column[i]) + strlen(
"ChiSquared") + 1,
"%sChiSquared", column[i]);
406 fprintf(stderr,
"Opening file %s.\n", outputfile);
407 if (!
SDDS_InitializeOutput(&outputPage, ascii ? SDDS_ASCII : SDDS_BINARY, 1,
"2-term fit", NULL, outputfile) ||
411 if (columnMajorOrder != -1)
412 outputPage.layout.data_mode.column_major = columnMajorOrder;
414 outputPage.layout.data_mode.column_major = inputPage.layout.data_mode.column_major;
415 for (iCol = 0; iCol < columns; iCol++) {
419 Units = (
char *)malloc(
sizeof(*Units));
425 if (strlen(indVarUnits) && strlen(Units)) {
426 slopeUnits = (
char *)malloc(
sizeof(*slopeUnits) * (strlen(Units) + strlen(indVarUnits) + 2));
427 snprintf(slopeUnits, strlen(Units) + strlen(indVarUnits) + 2,
"%s/%s", Units, indVarUnits);
429 if (strlen(indVarUnits) && !strlen(Units)) {
430 slopeUnits = (
char *)malloc(
sizeof(*slopeUnits) * (strlen(indVarUnits) + 2));
431 snprintf(slopeUnits, strlen(indVarUnits) + 2,
"1/%s", indVarUnits);
433 if (!strlen(indVarUnits) && strlen(Units)) {
434 slopeUnits = (
char *)malloc(
sizeof(*slopeUnits) * (strlen(Units) + 1));
435 strcpy(slopeUnits, indVarUnits);
437 if (!strlen(indVarUnits) && !strlen(Units)) {
438 slopeUnits = (
char *)malloc(
sizeof(*slopeUnits));
439 strcpy(slopeUnits,
"");
455 if (!SDDS_StartTable(&outputPage, 1) ||
456 !
SDDS_SetParameters(&outputPage, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
"InputFile", inputfile ? inputfile :
"pipe", NULL) ||
457 !
SDDS_SetRowValues(&outputPage, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, 0,
"IndependentVariable", indColumnName, NULL))
462 sigmaColumn = (
char **)malloc(
sizeof(*sigmaColumn) * columns);
463 sigmaColumnExists = (
long *)malloc(columns *
sizeof(*sigmaColumnExists));
464 for (iCol = 0; iCol < columns; iCol++) {
465 sigmaColumn[iCol] = (
char *)malloc(
sizeof(**sigmaColumn) * (strlen(column[iCol]) + strlen(
"Sigma") + 1));
466 snprintf(sigmaColumn[iCol], strlen(column[iCol]) + strlen(
"Sigma") + 1,
"%sSigma", column[iCol]);
468 case SDDS_CHECK_WRONGUNITS:
469 case SDDS_CHECK_OKAY:
470 sigmaColumnExists[iCol] = 1;
474 snprintf(sigmaColumn[iCol], strlen(
"Sigma") + strlen(column[iCol]) + 1,
"Sigma%s", column[iCol]);
476 case SDDS_CHECK_WRONGUNITS:
477 case SDDS_CHECK_OKAY:
478 sigmaColumnExists[iCol] = 1;
481 sigmaColumnExists[iCol] = 0;
489 weight = (
double *)malloc(
sizeof(*weight) * rows);
490 diff = (
double *)malloc(
sizeof(*diff) * rows);
492 coef = (
double *)malloc(
sizeof(*coef) * (order + 1));
493 coefsigma = (
double *)malloc(
sizeof(*coefsigma) * (order + 1));
494 if (!weight || !diff || !coef || !coefsigma) {
495 fprintf(stderr,
"Memory allocation failed.\n");
499 weight = (
double *)realloc(weight,
sizeof(*weight) * rows);
500 diff = (
double *)realloc(diff,
sizeof(*diff) * rows);
501 coef = (
double *)realloc(coef,
sizeof(*coef) * (order + 1));
502 coefsigma = (
double *)realloc(coefsigma,
sizeof(*coefsigma) * (order + 1));
503 if (!weight || !diff || !coef || !coefsigma) {
504 fprintf(stderr,
"Memory reallocation failed.\n");
510 depVar = (
double *)malloc(
sizeof(*depVar) * rows);
512 fprintf(stderr,
"Memory allocation failed for depVar.\n");
516 depVar = (
double *)realloc(depVar,
sizeof(*depVar) * rows);
518 fprintf(stderr,
"Memory reallocation failed for depVar.\n");
522 for (iCol = 0; iCol < columns; iCol++) {
524 fprintf(stderr,
"Doing column %s.\n", column[iCol]);
529 for (i = j = 0; i < rowsOrig; i++) {
530 if (xMin <= indVarOrig[i] && indVarOrig[i] <= xMax) {
531 depVar[j] = depVarOrig[i];
553 for (iRow = 0; iRow < rows; iRow++)
559 if (!generateSigma && sigmaColumnExists[iCol]) {
561 fprintf(stderr,
"\tUsing column %s for sigma.\n", sigmaColumn[iCol]);
565 for (iRow = 0; iRow < rows; iRow++) {
566 if (weight[iRow] < minSigma)
567 weight[iRow] = minSigma;
573 for (iRow = 0; iRow < rows; iRow++) {
574 sigmaSum += weight[iRow];
583 fprintf(stderr,
"Warning: All sigmas are zero.\n");
584 doPreliminaryFit = 1;
585 }
else if (validSigmas != rows) {
587 averageSigma = sigmaSum / validSigmas;
589 fprintf(stderr,
"Warning: replacing %" PRId64
" invalid sigmas with average (%e)\n", rows - validSigmas, averageSigma);
590 for (iRow = 0; iRow < rows; iRow++) {
592 weight[iRow] = averageSigma;
597 doPreliminaryFit = 1;
601 if (doPreliminaryFit) {
603 fprintf(stderr,
"\tGenerating sigmas from rms residual of a preliminary fit.\n");
604 if (!(
lsfn(indVar, depVar, weight, rows, order, coef, coefsigma, &chi, diff))) {
605 fprintf(stderr,
"Problem with call to lsfn\n.");
610 for (iRow = 0; iRow < rows; iRow++) {
611 rmsResidual += sqr(diff[iRow]);
613 rmsResidual = sqrt(rmsResidual / (rows));
614 for (iRow = 0; iRow < rows; iRow++) {
615 weight[iRow] = rmsResidual;
618 for (iRow = 0; iRow < rows; iRow++) {
619 if (weight[iRow] < minSigma)
620 weight[iRow] = minSigma;
625 if (!(
lsfn(indVar, depVar, weight, rows, order, coef, coefsigma, &chi, diff))) {
626 fprintf(stderr,
"Problem with call to lsfn\n.");
630 intColumn[iCol], coef[0], slopeColumn[iCol], coef[1], NULL))
633 interceptSigma = coefsigma[0];
634 slopeSigma = coefsigma[1];
636 chiSquaredColumn[iCol], chi, interceptSigmaColumn[iCol],
637 interceptSigma, slopeSigmaColumn[iCol], slopeSigma, NULL))
645 diffOrig = (
double *)malloc(rowsOrig *
sizeof(
double));
647 fprintf(stderr,
"Memory allocation failed for diffOrig.\n");
650 for (i = 0; i < rowsOrig; i++) {
651 diffOrig[i] = depVarOrig[i] - coef[0] - coef[1] * indVarOrig[i];
654 diffOrig, rowsOrig, column[iCol]))
659 diff, rows, column[iCol]))
666 if (!SDDS_WriteTable(&residualPage))
670 if (!SDDS_WriteTable(&outputPage))
694 for (i = 0; i < columns; i++) {
696 free(slopeColumn[i]);
698 free(slopeSigmaColumn[i]);
699 free(interceptSigmaColumn[i]);
700 free(chiSquaredColumn[i]);
702 free(sigmaColumn[i]);
707 free(slopeSigmaColumn);
708 free(interceptSigmaColumn);
709 free(chiSquaredColumn);
712 free(sigmaColumnExists);
715 if (excludeColumn && excludeColumn != defaultExcludedColumn)
721long set_multicolumn_flags(
SDDS_TABLE *SDDSin,
char ***column, int32_t *columns,
char **exclude,
long excludes) {
726 for (i = 0; i < *columns; i++) {
736 for (i = 0; i < *columns; i++) {
745 for (i = 0; i < excludes; i++)
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
char * SDDS_FindColumn(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Finds the first column in the SDDS dataset that matches the specified criteria.
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * delete_chars(char *s, char *t)
Removes all occurrences of characters found in string t from string s.
long lsfn(double *xd, double *yd, double *sy, long nd, long nf, double *coef, double *s_coef, double *chi, double *diff)
Computes nth order polynomial least squares fit.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.