SDDSlib
Loading...
Searching...
No Matches
sddssplinefit.c
Go to the documentation of this file.
1/**
2 * @file sddssplinefit.c
3 * @brief Performs nth order spline least squares fitting for SDDS files.
4 *
5 * This program fits splines to data contained in SDDS (Self Describing Data Sets) files.
6 * It allows for various configurations such as specifying the order of the spline,
7 * the number of coefficients or breakpoints, handling of sigma values, and more.
8 *
9 * @section Usage
10 * \code
11 * sddssplinefit [options] [<inputfile>] [<outputfile>]
12 * \endcode
13 *
14 * @section Options
15 * -pipe=[input][,output]
16 * -independent=<xName>
17 * -dependent=<yName1-wildcard>[,<yName2-wildcard>...]
18 * -sigmaIndependent=<xSigma>
19 * -sigmaDependent=<ySigmaFormatString>
20 * -order=<number>
21 * -coefficients=<number>
22 * -breakpoints=<number>
23 * -xOffset=<value>
24 * -xFactor=<value>
25 * -sigmas=<value>,{absolute | fractional}
26 * -modifySigmas
27 * -generateSigmas[={keepLargest, keepSmallest}]
28 * -sparse=<interval>
29 * -range=<lower>,<upper>[,fitOnly]
30 * -normalize[=<termNumber>]
31 * -verbose
32 * -evaluate=<filename>[,begin=<value>][,end=<value>][,number=<integer>][,derivatives=<order>][,basis]
33 * -infoFile=<filename>
34 * -copyParameters
35 *
36 * @copyright
37 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
38 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
39 *
40 * @license
41 * This file is distributed under the terms of the Software License Agreement
42 * found in the file LICENSE included with this distribution.
43 *
44 * @author L. Emery, M. Borland, R. Soliday
45 */
46
47#include "mdb.h"
48#include "SDDS.h"
49#include "SDDSutils.h"
50#include "scan.h"
51
52#include <gsl/gsl_bspline.h>
53#include <gsl/gsl_multifit.h>
54#include <gsl/gsl_rng.h>
55#include <gsl/gsl_randist.h>
56#include <gsl/gsl_statistics.h>
57#include <gsl/gsl_version.h>
58
59void makeSubstitutions(char *buffer1, char *buffer2, char *template, char *nameRoot, char *symbolRoot, char *xName, char *xSymbol);
60char *changeInformation(SDDS_DATASET *SDDSout, char *name, char *nameRoot, char *symbolRoot, char *xName, char *xSymbol, char **template, char *newUnits);
61char **makeCoefficientUnits(SDDS_DATASET *SDDSout, char *xName, char *yName, long int order, long coeffs);
62void initializeOutputFile(SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSoutInfo, char *output, char *outputInfo,
63 SDDS_DATASET *SDDSin, char *input, char *xName, char **yNames, char *xSigmaName,
64 char **ySigmaNames, long sigmasValid, long int order, long coeffs, long breaks,
65 long numCols, long copyParameters);
66void checkInputFile(SDDS_DATASET *SDDSin, char *xName, char **yNames, char *xSigmaName, char **ySigmaNames, long numYNames);
67long coefficient_index(long int order, long coeffs, long order_of_interest);
68
69char **ResolveColumnNames(SDDS_DATASET *SDDSin, char **wildcardList, long length, int32_t *numYNames);
70char **GenerateYSigmaNames(char *controlString, char **yNames, long numYNames);
71void RemoveElementFromStringArray(char **array, long index, long length);
72char **RemoveNonNumericColumnsFromNameArray(SDDS_DATASET *SDDSin, char **columns, int32_t *numColumns);
73
74int print_matrix(FILE *f, const gsl_matrix *m);
75
76/* Enumeration for option types */
77enum option_type {
78 CLO_DEPENDENT,
79 CLO_ORDER,
80 CLO_COEFFICIENTS,
81 CLO_BREAKPOINTS,
82 CLO_REVISEORDERS,
83 CLO_XXXXX,
84 CLO_MODIFYSIGMAS,
85 CLO_SIGMAS,
86 CLO_GENERATESIGMAS,
87 CLO_RANGE,
88 CLO_SPARSE,
89 CLO_NORMALIZE,
90 CLO_XFACTOR,
91 CLO_XOFFSET,
92 CLO_VERBOSE,
93 CLO_PIPE,
94 CLO_EVALUATE,
95 CLO_INDEPENDENT,
96 CLO_SIGMAINDEPENDENT,
97 CLO_SIGMADEPENDENT,
98 CLO_INFOFILE,
99 CLO_COPYPARAMETERS,
100 N_OPTIONS
101};
102
103char *option[N_OPTIONS] = {
104 "dependent",
105 "order",
106 "coefficients",
107 "breakpoints",
108 "reviseorders",
109 "splinebasis",
110 "modifysigmas",
111 "sigmas",
112 "generatesigmas",
113 "range",
114 "sparse",
115 "normalize",
116 "xfactor",
117 "xoffset",
118 "verbose",
119 "pipe",
120 "evaluate",
121 "independent",
122 "sigmaindependent",
123 "sigmadependent",
124 "infofile",
125 "copyparameters",
126};
127
128char *USAGE =
129 "Usage: sddssplinefit [options] [<inputfile>] [<outputfile>]\n"
130 "Options:\n"
131 " -pipe=[input][,output] Use standard input and/or output for data.\n"
132 " -independent=<xName> Specify the name of the independent data column to use.\n"
133 " -dependent=<yName1-wildcard>[,<yName2-wildcard>...] Specify names of dependent data columns to use, supporting wildcards and separated by commas.\n"
134 " -sigmaIndependent=<xSigma> Specify the name of the independent sigma values column.\n"
135 " -sigmaDependent=<ySigmaFormatString> Specify a printf-style control string to generate dependent sigma column names from independent variable names (e.g., %sSigma).\n"
136 " -order=<number> Define the order of the spline. Default is 4.\n"
137 " -coefficients=<number> Set the number of coefficients. Specify either coefficients or breakpoints, not both.\n"
138 " -breakpoints=<number> Set the number of breakpoints. Condition enforced: breakpoints = coefficients + 2 - order.\n"
139 " -xOffset=<value> Define the desired value of x to fit about.\n"
140 " -xFactor=<value> Define the factor to multiply x values by before fitting.\n"
141 " -sigmas=<value>,{absolute | fractional} Specify absolute or fractional sigma for all points.\n"
142 " -modifySigmas Modify the y sigmas using the x sigmas and an initial fit.\n"
143 " -generateSigmas[={keepLargest, keepSmallest}] Generate y sigmas from the RMS deviation of an initial fit. Optionally keep the sigmas from the data if larger/smaller than the RMS deviation.\n"
144 " -sparse=<interval> Specify an integer interval at which to sample data.\n"
145 " -range=<lower>,<upper>[,fitOnly] Define the range of the independent variable over which to perform the fit and evaluation. If 'fitOnly' is given, the fit is compared to data over the original range.\n"
146 " -normalize[=<termNumber>] Normalize so that the specified term is unity.\n"
147 " -verbose Enable verbose output for additional information.\n"
148 " -evaluate=<filename>[,begin=<value>][,end=<value>][,number=<integer>][,derivatives=<order>][,basis] \n"
149 " Evaluate the spline fit and optionally compute derivatives and provide basis functions.\n"
150 " -infoFile=<filename> Specify a file to output fit information.\n"
151 " -copyParameters Copy parameters from the input file to the output file.\n\n"
152 "Program by Louis Emery, started with Michael Borland polynomial fit program (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
153
154static char *additional_help = "\n\
155sddssplinefit performs spline fits of the form y = SUM(i){ A[i] * B(x-x_offset, i)}, where B(x,i) is the ith basis\n\
156spline function evaluated at x. Internally, sddssplinefit computes the A[i] coefficients, writes the fitted y values to the output file,\n\
157and estimates the errors in these values.\n";
158
159static char *additional_help2 = "\n\
160 -independent Specify the name of the independent data column to use.\n\
161 -dependent Specify the names of dependent data columns to use, supporting wildcards and separated by commas.\n\
162 -sigmaIndependent Specify the name of the independent sigma values column.\n\
163 -sigmaDependent Specify a printf-style control string to generate dependent sigma column names from independent variable names (e.g., %sSigma).\n\
164 -order Define the order of the spline. Default is 4.\n\
165 -coefficients Set the number of coefficients. Specify either coefficients or breakpoints, not both.\n\
166 -breakpoints Set the number of breakpoints. Condition enforced: breakpoints = coefficients + 2 - order.\n\
167 -xOffset Define the desired value of x to fit about.\n\
168 -xFactor Define the factor to multiply x values by before fitting.\n\
169 -sigmas Specify absolute or fractional sigma for all points.\n\
170 -modifySigmas Modify the y sigmas using the x sigmas and an initial fit.\n\
171 -generateSigmas Generate y sigmas from the RMS deviation of an initial fit.\n\
172 Optionally keep the sigmas from the data if larger/smaller than the RMS deviation.\n\
173 -sparse Specify an integer interval at which to sample data.\n\
174 -range Define the range of the independent variable over which to perform the fit and evaluation.\n\
175 If 'fitOnly' is given, the fit is compared to data over the original range.\n\
176 -normalize Normalize so that the specified term is unity.\n\
177 -verbose Enable verbose output for additional information.\n\
178 -evaluate Evaluate the spline fit and optionally compute derivatives and provide basis functions.\n\
179 -infoFile Specify a file to output fit information.\n\
180 -copyParameters Copy parameters from the input file to the output file.\n\n";
181
182#define ABSOLUTE_SIGMAS 0
183#define FRACTIONAL_SIGMAS 1
184#define N_SIGMAS_OPTIONS 2
185char *sigmas_options[N_SIGMAS_OPTIONS] = {"absolute", "fractional"};
186
187#define FLGS_GENERATESIGMAS 1
188#define FLGS_KEEPLARGEST 2
189#define FLGS_KEEPSMALLEST 4
190
191#define REVPOW_ACTIVE 0x0001
192#define REVPOW_VERBOSE 0x0002
193/* SDDS indices for output page */
194static long iOffset = -1, iOffsetO = -1, iFactor = -1, iFactorO = -1;
195static long *iChiSq = NULL, *iChiSqO = NULL, *iRmsResidual = NULL, *iRmsResidualO = NULL, *iSigLevel = NULL, *iSigLevelO = NULL;
196static long *iFitIsValid = NULL, *iFitIsValidO = NULL;
197
198/* These column indices are globals, so we don't have to pass them to functions */
199static long ix = -1, ixSigma = -1;
200static long *iy = NULL, *iySigma = NULL;
201static long *iFit = NULL, *iResidual = NULL;
202
203static long *iCoefficient = NULL, *iCoefficientSigma = NULL, *iCoefficientUnits = NULL;
204
205static char *xSymbol, **ySymbols;
206
207#define EVAL_BEGIN_GIVEN 0x0001U
208#define EVAL_END_GIVEN 0x0002U
209#define EVAL_NUMBER_GIVEN 0x0004U
210#define EVAL_DERIVATIVES 0x0008U
211#define EVAL_PROVIDEBASIS 0x0010U
212
213#define MAX_Y_SIGMA_NAME_SIZE 1024
214
215typedef struct
216{
217 char *file;
218 long initialized;
219 int64_t number, nderiv;
220 unsigned long flags;
221 double begin, end;
222 SDDS_DATASET dataset;
223 char ***yDerivName, ***yDerivUnits; /* a matrix of names */
224 long *iSpline; /* index of spline columns for dataset */
225 gsl_bspline_workspace *bw; /* eases the passing of parameters in function arguments */
227void setupEvaluationFile(EVAL_PARAMETERS *evalParameters, char *xName, char **yName, long yNames, SDDS_DATASET *SDDSin);
228void makeEvaluationTable(EVAL_PARAMETERS *evalParameters, double *x, int64_t points, gsl_vector *B,
229 gsl_matrix *cov, gsl_vector *c, char *xName, char **yName, long yNames, long iYName, long int order);
230
231int main(int argc, char **argv) {
232 double **y = NULL, **yFit = NULL, **sy = NULL, **diff = NULL;
233 double *x = NULL, *sx = NULL;
234 double xOffset, xScaleFactor;
235 double *xOrig = NULL, **yOrig = NULL, **yFitOrig = NULL, *sxOrig = NULL, **syOrig = NULL, **sy0 = NULL;
236 long coeffs, breaks, normTerm, ySigmasValid;
237 long orderGiven, coeffsGiven, breaksGiven;
238 int64_t i, j, points, pointsOrig;
239 double sigmas;
240 long sigmasMode, sparseInterval;
241 double *chi = NULL, xLow, xHigh, *rmsResidual = NULL;
242 char *xName = NULL, *yName = NULL, **yNames = NULL, *xSigmaName = NULL;
243 char **ySigmaNames = NULL, *ySigmaControlString = NULL;
244 char *input = NULL, *output = NULL;
245 SDDS_DATASET SDDSin, SDDSout, SDDSoutInfo;
246 long *isFit = NULL, iArg, modifySigmas;
247 long generateSigmas, verbose, ignoreSigmas;
248 long outputInitialized, copyParameters = 0;
249 long int order;
250 SCANNED_ARG *s_arg;
251 double xMin, xMax, revpowThreshold;
252 double rms_average(double *d_x, int64_t d_n);
253 char *infoFile = NULL;
254 unsigned long pipeFlags, reviseOrders;
255 EVAL_PARAMETERS evalParameters;
256 long rangeFitOnly = 0;
257
258 long colIndex;
259 long cloDependentIndex = -1, numDependentItems;
260 int32_t numYNames;
261
262 /* spline memory for working on a single column */
263 gsl_bspline_workspace *bw;
264 gsl_vector *B;
265 gsl_vector *c, *yGsl, *wGsl;
266 gsl_matrix *X, *cov;
267 gsl_multifit_linear_workspace *mw;
268 long degreesOfFreedom;
269 double totalSumSquare, Rsq;
270
272 argc = scanargs(&s_arg, argc, argv);
273 if (argc < 2 || argc > (3 + N_OPTIONS)) {
274 fprintf(stderr, "usage: %s\n", USAGE);
275 fprintf(stderr, "%s%s", additional_help, additional_help2);
276 exit(EXIT_FAILURE);
277 }
278
279 input = output = NULL;
280 xName = yName = xSigmaName = ySigmaControlString = NULL;
281 yNames = ySigmaNames = NULL;
282 numDependentItems = 0;
283 modifySigmas = reviseOrders = 0;
284 /* 8 independent coefficients, a default that I think is reasonable */
285 coeffs = 8;
286 xMin = xMax = 0;
287 generateSigmas = 0;
288 sigmasMode = -1;
289 sigmas = 1;
290 sparseInterval = 1;
291 verbose = ignoreSigmas = 0;
292 normTerm = -1;
293 xOffset = 0;
294 xScaleFactor = 1;
295 pipeFlags = 0;
296 evalParameters.file = NULL;
297 infoFile = NULL;
298 orderGiven = 0;
299 coeffsGiven = 0;
300 breaksGiven = 0;
301
302 for (iArg = 1; iArg < argc; iArg++) {
303 if (s_arg[iArg].arg_type == OPTION) {
304 switch (match_string(s_arg[iArg].list[0], option, N_OPTIONS, 0)) {
305 case CLO_MODIFYSIGMAS:
306 modifySigmas = 1;
307 break;
308 case CLO_ORDER:
309 if (s_arg[iArg].n_items != 2 || sscanf(s_arg[iArg].list[1], "%ld", &order) != 1)
310 SDDS_Bomb("invalid -order syntax");
311 orderGiven = 1;
312 break;
313 case CLO_COEFFICIENTS:
314 if (s_arg[iArg].n_items != 2 || sscanf(s_arg[iArg].list[1], "%ld", &coeffs) != 1)
315 SDDS_Bomb("invalid -coefficients syntax");
316 coeffsGiven = 1;
317 break;
318 case CLO_BREAKPOINTS:
319 if (s_arg[iArg].n_items != 2 || sscanf(s_arg[iArg].list[1], "%ld", &breaks) != 1)
320 SDDS_Bomb("invalid -breakpoints syntax");
321 breaksGiven = 1;
322 break;
323 case CLO_RANGE:
324 rangeFitOnly = 0;
325 if ((s_arg[iArg].n_items != 3 && s_arg[iArg].n_items != 4) ||
326 1 != sscanf(s_arg[iArg].list[1], "%lf", &xMin) ||
327 1 != sscanf(s_arg[iArg].list[2], "%lf", &xMax) ||
328 xMin >= xMax)
329 SDDS_Bomb("incorrect -range syntax");
330 if (s_arg[iArg].n_items == 4) {
331 if (strncmp(str_tolower(s_arg[iArg].list[3]), "fitonly", strlen(s_arg[iArg].list[3])) == 0) {
332 rangeFitOnly = 1;
333 } else
334 SDDS_Bomb("incorrect -range syntax");
335 }
336 break;
337 case CLO_GENERATESIGMAS:
338 generateSigmas = FLGS_GENERATESIGMAS;
339 if (s_arg[iArg].n_items > 1) {
340 if (s_arg[iArg].n_items != 2)
341 SDDS_Bomb("incorrect -generateSigmas syntax");
342 if (strncmp(s_arg[iArg].list[1], "keepsmallest", strlen(s_arg[iArg].list[1])) == 0)
343 generateSigmas |= FLGS_KEEPSMALLEST;
344 if (strncmp(s_arg[iArg].list[1], "keeplargest", strlen(s_arg[iArg].list[1])) == 0)
345 generateSigmas |= FLGS_KEEPLARGEST;
346 if ((generateSigmas & FLGS_KEEPSMALLEST) && (generateSigmas & FLGS_KEEPLARGEST))
347 SDDS_Bomb("ambiguous -generateSigmas syntax");
348 }
349 break;
350 case CLO_XOFFSET:
351 if (s_arg[iArg].n_items != 2 || sscanf(s_arg[iArg].list[1], "%lf", &xOffset) != 1)
352 SDDS_Bomb("invalid -xOffset syntax");
353 break;
354 case CLO_SIGMAS:
355 if (s_arg[iArg].n_items != 3)
356 SDDS_Bomb("incorrect -sigmas syntax");
357 if (sscanf(s_arg[iArg].list[1], "%lf", &sigmas) != 1)
358 SDDS_Bomb("couldn't scan value for -sigmas");
359 if ((sigmasMode = match_string(s_arg[iArg].list[2], sigmas_options, N_SIGMAS_OPTIONS, 0)) < 0)
360 SDDS_Bomb("unrecognized -sigmas mode");
361 break;
362 case CLO_SPARSE:
363 if (s_arg[iArg].n_items != 2)
364 SDDS_Bomb("incorrect -sparse syntax");
365 if (sscanf(s_arg[iArg].list[1], "%ld", &sparseInterval) != 1)
366 SDDS_Bomb("couldn't scan value for -sparse");
367 if (sparseInterval < 1)
368 SDDS_Bomb("invalid -sparse value");
369 break;
370 case CLO_VERBOSE:
371 verbose = 1;
372 break;
373 case CLO_NORMALIZE:
374 normTerm = 0;
375 if (s_arg[iArg].n_items > 2 ||
376 (s_arg[iArg].n_items == 2 && sscanf(s_arg[iArg].list[1], "%ld", &normTerm) != 1) ||
377 normTerm < 0)
378 SDDS_Bomb("invalid -normalize syntax");
379 break;
380 case CLO_REVISEORDERS:
381 revpowThreshold = 0.1;
382 s_arg[iArg].n_items -= 1;
383 if (!scanItemList(&reviseOrders, s_arg[iArg].list + 1, &s_arg[iArg].n_items, 0,
384 "threshold", SDDS_DOUBLE, &revpowThreshold, 1, 0,
385 "verbose", -1, NULL, 1, REVPOW_VERBOSE, NULL))
386 SDDS_Bomb("invalid -reviseOrders syntax");
387 s_arg[iArg].n_items += 1;
388 reviseOrders |= REVPOW_ACTIVE;
389 revpowThreshold = fabs(revpowThreshold);
390 break;
391 case CLO_XFACTOR:
392 if (s_arg[iArg].n_items != 2 ||
393 sscanf(s_arg[iArg].list[1], "%lf", &xScaleFactor) != 1 || xScaleFactor == 0)
394 SDDS_Bomb("invalid -xFactor syntax");
395 break;
396 case CLO_INDEPENDENT:
397 if (s_arg[iArg].n_items != 2)
398 SDDS_Bomb("invalid -independent syntax");
399 xName = s_arg[iArg].list[1];
400 break;
401 case CLO_DEPENDENT:
402 numDependentItems = s_arg[iArg].n_items - 1;
403 cloDependentIndex = iArg;
404 if (numDependentItems < 1)
405 SDDS_Bomb("invalid -dependent syntax");
406 break;
407 case CLO_SIGMAINDEPENDENT:
408 if (s_arg[iArg].n_items != 2)
409 SDDS_Bomb("invalid -sigmaIndependent syntax");
410 xSigmaName = s_arg[iArg].list[1];
411 break;
412 case CLO_SIGMADEPENDENT:
413 if (s_arg[iArg].n_items != 2)
414 SDDS_Bomb("invalid -sigmaDependent syntax");
415 ySigmaControlString = s_arg[iArg].list[1];
416 break;
417 case CLO_PIPE:
418 if (!processPipeOption(s_arg[iArg].list + 1, s_arg[iArg].n_items - 1, &pipeFlags))
419 SDDS_Bomb("invalid -pipe syntax");
420 break;
421 case CLO_INFOFILE:
422 if (s_arg[iArg].n_items != 2)
423 SDDS_Bomb("invalid -infoFile syntax");
424 infoFile = s_arg[iArg].list[1];
425 break;
426 case CLO_EVALUATE:
427 if (s_arg[iArg].n_items < 2)
428 SDDS_Bomb("invalid -evaluate syntax");
429 evalParameters.file = s_arg[iArg].list[1];
430 s_arg[iArg].n_items -= 2;
431 s_arg[iArg].list += 2;
432 evalParameters.begin = 0;
433 evalParameters.end = 0;
434 evalParameters.nderiv = 0;
435 evalParameters.number = 0;
436 if (!scanItemList(&evalParameters.flags, s_arg[iArg].list, &s_arg[iArg].n_items, 0,
437 "begin", SDDS_DOUBLE, &evalParameters.begin, 1, EVAL_BEGIN_GIVEN,
438 "end", SDDS_DOUBLE, &evalParameters.end, 1, EVAL_END_GIVEN,
439 "derivatives", SDDS_LONG64, &evalParameters.nderiv, 1, EVAL_DERIVATIVES,
440 "basis", -1, NULL, 0, EVAL_PROVIDEBASIS,
441 "number", SDDS_LONG64, &evalParameters.number, 1, EVAL_NUMBER_GIVEN, NULL))
442 SDDS_Bomb("invalid -evaluate syntax");
443 s_arg[iArg].n_items += 2;
444 s_arg[iArg].list -= 2;
445 break;
446 case CLO_COPYPARAMETERS:
447 copyParameters = 1;
448 break;
449 default:
450 bomb("unknown switch", USAGE);
451 break;
452 }
453 } else {
454 if (input == NULL)
455 input = s_arg[iArg].list[0];
456 else if (output == NULL)
457 output = s_arg[iArg].list[0];
458 else
459 SDDS_Bomb("too many filenames");
460 }
461 }
462 /* your basic spline is order 4 (continuous second derivative) */
463 if (!orderGiven)
464 order = 4;
465 if (!breaksGiven)
466 breaks = coeffs + 2 - order;
467 if (!coeffsGiven)
468 coeffs = breaks - 2 + order;
469 if (breaksGiven && coeffsGiven)
470 SDDS_Bomb("You must specify only one of breakpoints or coefficients");
471
472 processFilenames("sddssplinefit", &input, &output, pipeFlags, 0, NULL);
473
474 if (!xName || !numDependentItems)
475 SDDS_Bomb("you must specify a column name for x and y");
476 if (modifySigmas && !xSigmaName)
477 SDDS_Bomb("you must specify x sigmas with -modifySigmas");
478 if (generateSigmas) {
479 if (modifySigmas)
480 SDDS_Bomb("you can't specify both -generateSigmas and -modifySigmas");
481 }
482 if (ySigmaControlString) {
483 if (sigmasMode != -1)
484 SDDS_Bomb("you can't specify both -sigmas and a y sigma name");
485 }
486 ySigmasValid = 0;
487 if (sigmasMode != -1 || generateSigmas || ySigmaControlString || modifySigmas)
488 ySigmasValid = 1;
489
490 if (!SDDS_InitializeInput(&SDDSin, input))
491 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
492 outputInitialized = 0;
493 yNames = ResolveColumnNames(&SDDSin, s_arg[cloDependentIndex].list + 1, numDependentItems, &numYNames);
494 if (ySigmaControlString != NULL)
495 ySigmaNames = GenerateYSigmaNames(ySigmaControlString, yNames, numYNames);
496
497 checkInputFile(&SDDSin, xName, yNames, xSigmaName, ySigmaNames, numYNames);
498 sy0 = tmalloc(sizeof(double *) * numYNames);
499 y = tmalloc(sizeof(double *) * numYNames);
500 yFit = tmalloc(sizeof(double *) * numYNames); /* this array of arrays is not in sddsmpfit */
501 sy = tmalloc(sizeof(double *) * numYNames);
502 isFit = tmalloc(sizeof(long) * numYNames);
503 chi = tmalloc(sizeof(double) * numYNames);
504 iCoefficient = tmalloc(sizeof(long) * numYNames);
505 iCoefficientSigma = tmalloc(sizeof(long) * numYNames);
506 iCoefficientUnits = tmalloc(sizeof(long) * numYNames);
507
508 while (SDDS_ReadPage(&SDDSin) > 0) {
509 if ((points = SDDS_CountRowsOfInterest(&SDDSin)) < coeffs) {
510 /* probably should emit an empty page here */
511 continue;
512 }
513 if (verbose)
514 fprintf(stdout, "number of points %" PRId64 "\n", points);
515 if (!(x = SDDS_GetColumnInDoubles(&SDDSin, xName))) {
516 fprintf(stderr, "error: unable to read column %s\n", xName);
517 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
518 }
519 for (i = 0; i < numYNames; i++) {
520 if (!(y[i] = SDDS_GetColumnInDoubles(&SDDSin, yNames[i]))) {
521 fprintf(stderr, "error: unable to read column %s\n", yNames[i]);
522 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
523 }
524 }
525 sx = NULL;
526 if (xSigmaName && !(sx = SDDS_GetColumnInDoubles(&SDDSin, xSigmaName))) {
527 fprintf(stderr, "error: unable to read column %s\n", xSigmaName);
528 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
529 }
530 for (colIndex = 0; colIndex < numYNames; colIndex++) {
531 sy0[colIndex] = tmalloc(sizeof(double) * points);
532 yFit[colIndex] = tmalloc(sizeof(double) * points);
533 }
534
535 if (ySigmaNames) {
536 for (i = 0; i < numYNames; i++) {
537 if (!(sy0[i] = SDDS_GetColumnInDoubles(&SDDSin, ySigmaNames[i]))) {
538 fprintf(stderr, "error: unable to read column %s\n", ySigmaNames[i]);
539 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
540 }
541 }
542 }
543
544 if (xMin != xMax || sparseInterval != 1) {
545 xOrig = tmalloc(sizeof(*xOrig) * points);
546 yOrig = tmalloc(sizeof(*yOrig) * numYNames);
547 yFitOrig = tmalloc(sizeof(*yFitOrig) * numYNames);
548 for (colIndex = 0; colIndex < numYNames; colIndex++) {
549 if (verbose)
550 fprintf(stdout, "Setting up a separate array for range or sparsing for column %s because of range option ...\n", yNames[colIndex]);
551 yOrig[colIndex] = tmalloc(sizeof(double) * points);
552 yFitOrig[colIndex] = tmalloc(sizeof(double) * points);
553 }
554 if (sx)
555 sxOrig = tmalloc(sizeof(*sxOrig) * points);
556 if (ySigmasValid) {
557 syOrig = tmalloc(sizeof(*syOrig) * numYNames);
558 for (colIndex = 0; colIndex < numYNames; colIndex++)
559 syOrig[colIndex] = tmalloc(sizeof(double) * points);
560 }
561 pointsOrig = points;
562 for (i = j = 0; i < points; i++) {
563 xOrig[i] = x[i];
564 if (sx)
565 sxOrig[i] = sx[i];
566 for (colIndex = 0; colIndex < numYNames; colIndex++) {
567 yOrig[colIndex][i] = y[colIndex][i];
568 if (ySigmasValid)
569 syOrig[colIndex][i] = sy0[colIndex][i];
570 }
571 }
572 if (xMin != xMax) {
573 for (i = j = 0; i < points; i++) {
574 if (xOrig[i] <= xMax && xOrig[i] >= xMin) {
575 x[j] = xOrig[i];
576 for (colIndex = 0; colIndex < numYNames; colIndex++) {
577 y[colIndex][j] = yOrig[colIndex][i];
578 if (ySigmasValid)
579 sy0[colIndex][j] = syOrig[colIndex][i];
580 }
581 if (sx)
582 sx[j] = sxOrig[i];
583 j++;
584 }
585 }
586 points = j;
587 }
588 if (sparseInterval != 1) {
589 for (i = j = 0; i < points; i++) {
590 if (i % sparseInterval == 0) {
591 x[j] = x[i];
592 for (colIndex = 0; colIndex < numYNames; colIndex++) {
593 y[colIndex][j] = y[colIndex][i];
594 if (ySigmasValid)
595 sy0[colIndex][j] = sy0[colIndex][i];
596 }
597 if (sx)
598 sx[j] = sx[i];
599 j++;
600 }
601 }
602 points = j;
603 }
604 } else {
605 /* normal processing, no ranges or sparsing */
606 xOrig = x;
607 yOrig = y;
608 sxOrig = sx;
609 syOrig = sy0;
610 pointsOrig = points;
611 }
612
613 find_min_max(&xLow, &xHigh, x, points);
614 if (verbose)
615 fprintf(stdout, "Range: xLow %lf; xHigh %lf; points %" PRId64 "\n", xLow, xHigh, points);
616 if (sigmasMode == ABSOLUTE_SIGMAS) {
617 for (colIndex = 0; colIndex < numYNames; colIndex++) {
618 for (i = 0; i < points; i++)
619 sy0[colIndex][i] = sigmas;
620 if (sy0[colIndex] != syOrig[colIndex])
621 for (i = 0; i < pointsOrig; i++)
622 syOrig[colIndex][i] = sigmas;
623 }
624 } else if (sigmasMode == FRACTIONAL_SIGMAS) {
625 for (colIndex = 0; colIndex < numYNames; colIndex++) {
626 for (i = 0; i < points; i++)
627 sy0[colIndex][i] = sigmas * fabs(y[colIndex][i]);
628 if (sy0[colIndex] != syOrig[colIndex])
629 for (i = 0; i < pointsOrig; i++)
630 syOrig[colIndex][i] = fabs(yOrig[colIndex][i]) * sigmas;
631 }
632 }
633
634 if (!ySigmasValid || generateSigmas)
635 for (colIndex = 0; colIndex < numYNames; colIndex++) {
636 for (i = 0; i < points; i++)
637 sy0[colIndex][i] = 1;
638 }
639 else
640 for (i = 0; i < points; i++)
641 for (colIndex = 0; colIndex < numYNames; colIndex++) {
642 if (sy0[colIndex][i] == 0)
643 SDDS_Bomb("y sigma = 0 for one or more points.");
644 }
645
646 diff = tmalloc(sizeof(*diff) * numYNames);
647 sy = tmalloc(sizeof(*sy) * numYNames);
648 for (colIndex = 0; colIndex < numYNames; colIndex++) {
649 diff[colIndex] = tmalloc(sizeof(double) * points);
650 sy[colIndex] = tmalloc(sizeof(double) * points);
651 }
652
653 for (i = 0; i < points; i++) {
654 for (colIndex = 0; colIndex < numYNames; colIndex++)
655 sy[colIndex][i] = sy0[colIndex][i];
656 }
657
658 /* this seems the places when things really start */
659
660 /* allocate a cubic bspline workspace (k = 4) */
661 /* k is order of spline; cubic infers k=4; breaks are number of splines */
662 /* each will be reused for each of the columns. No need to make them an array */
663 bw = gsl_bspline_alloc(order, breaks);
664 B = gsl_vector_alloc(coeffs); /* coeffs are the number of linear,
665 quadratic and cubic coeff on the
666 splines, say for k=4 */
667 X = gsl_matrix_alloc(points, coeffs);
668 c = gsl_vector_alloc(coeffs);
669 yGsl = gsl_vector_alloc(points);
670 wGsl = gsl_vector_alloc(points);
671 cov = gsl_matrix_alloc(coeffs, coeffs);
672 mw = gsl_multifit_linear_alloc(points, coeffs);
673 degreesOfFreedom = points - coeffs;
674 if (verbose)
675 fprintf(stdout, "Order %ld\ncoefficients %ld\nbreak points %ld\n", order, coeffs, breaks);
676 if (generateSigmas || modifySigmas)
677 fprintf(stderr, "generate sigmas or modify sigmas are not a feature in spline fitting.\n");
678
679 if (reviseOrders & REVPOW_ACTIVE)
680 fprintf(stderr, "revise orders is not a feature in spline fitting.\n");
681
682 if (!outputInitialized) {
683 initializeOutputFile(&SDDSout, &SDDSoutInfo, output, infoFile, &SDDSin, input, xName, yNames,
684 xSigmaName, ySigmaNames, ySigmasValid, order, coeffs, breaks, numYNames, copyParameters);
685 // free(output);
686 outputInitialized = 1;
687 /* we also want to setup the evaluation file only once */
688 if (evalParameters.file) {
689 /* check nderiv against order, repair if necessary */
690 if (evalParameters.nderiv >= order) {
691 evalParameters.nderiv = order - 1;
692 if (verbose)
693 fprintf(stderr, "Spline derivative order reduced to %" PRId64 " (i.e. order - 1)\n", evalParameters.nderiv);
694 }
695 evalParameters.bw = bw;
696 setupEvaluationFile(&evalParameters, xName, yNames, numYNames, &SDDSin);
697 }
698 }
699
700 rmsResidual = tmalloc(sizeof(double) * numYNames);
701
702 if (outputInitialized) {
703 if (!SDDS_StartPage(&SDDSout, rangeFitOnly ? pointsOrig : points) ||
704 (infoFile && !SDDS_StartPage(&SDDSoutInfo, coeffs)))
705 bomb("A", NULL);
706 if (copyParameters) {
707 if (!SDDS_CopyParameters(&SDDSout, &SDDSin))
708 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
709 if (infoFile && !SDDS_CopyParameters(&SDDSoutInfo, &SDDSin))
710 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
711 }
712 /* info file writing has been removed below for now. See sddsmpfit.c to retrieve the original */
713 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, rangeFitOnly ? xOrig : x, rangeFitOnly ? pointsOrig : points, ix))
714 bomb("B", NULL);
715 for (colIndex = 0; colIndex < numYNames; colIndex++) {
716 /* do fit now for each column */
717 double Bj;
718 for (i = 0; i < points; i++) {
719 gsl_vector_set(yGsl, i, y[colIndex][i]);
720 /* if there was no sigmaY data given then sy = 1 will be used */
721 gsl_vector_set(wGsl, i, 1.0 / ipower(sy[colIndex][i], 2));
722 }
723 /* use uniform breakpoints on [low, high] */
724 /* what if we don't want uniform breakpoints? */
725 gsl_bspline_knots_uniform(xLow, xHigh, bw);
726 /* alternative is gsl_bspline_knots ( breakpts, bw) where
727 breakpts is the gsl vector of breakpoints that is
728 supplied by the user */
729
730 /* construct the fit matrix X */
731 for (i = 0; i < points; ++i) {
732 /* compute B_j(xi) for all j */
733 /* B is a vector */
734 gsl_bspline_eval(x[i], B, bw);
735 /* fill in row i of X */
736 for (j = 0; j < coeffs; ++j) {
737 Bj = gsl_vector_get(B, j);
738 /* X is some sort of large matrix points x coeffs, e.g. 100 x 8 */
739 gsl_matrix_set(X, i, j, Bj);
740 }
741 }
742 /* show the matrix X */
743 if (verbose == 2) {
744 fprintf(stderr, "X matrix %s:\n", yNames[colIndex]);
745 print_matrix(stderr, X);
746 }
747 /* do the fit */
748 gsl_multifit_wlinear(X, wGsl, yGsl, c, cov, &chi[colIndex], mw);
749 if (verbose)
750 fprintf(stdout, "conventionally-defined chi = sum( sqr(diff) * weight): %e\n", chi[colIndex]);
751 /* c is the answer */
752 if (verbose == 2) {
753 fprintf(stderr, "Covariance matrix for %s:\n", yNames[colIndex]);
754 print_matrix(stderr, cov);
755 }
756 /* weighted total sum of squares */
757 totalSumSquare = gsl_stats_wtss(wGsl->data, 1, yGsl->data, 1, yGsl->size);
758 Rsq = 1.0 - chi[colIndex] / totalSumSquare;
759 if (verbose)
760 fprintf(stdout, "(reduced) chisq/dof = %e, Rsq = %f\n", chi[colIndex] / degreesOfFreedom, Rsq);
761
762 double y_err; /* standard deviation of model at x[i]. Is it useful? */
763 for (i = 0; i < points; i++) {
764 gsl_bspline_eval(x[i], B, bw);
765 /* y = B c is the evaluated function. B must be interesting looking. */
766 gsl_multifit_linear_est(B, c, cov, &yFit[colIndex][i], &y_err);
767 }
768 if (rangeFitOnly) {
769 for (i = 0; i < pointsOrig; i++) {
770 diff[colIndex][i] = yOrig[colIndex][i] - yFitOrig[colIndex][i];
771 }
772 rmsResidual[colIndex] = rms_average(diff[colIndex], points);
773 /* the index iFit refers to the fit data column corresponding to colIndex */
774 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, yOrig[colIndex], pointsOrig, iy[colIndex]) ||
775 !SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, yFitOrig[colIndex], pointsOrig, iFit[colIndex]) ||
776 !SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, diff[colIndex], pointsOrig, iResidual[colIndex]))
777 bomb("C", NULL);
778 } else {
779 for (i = 0; i < points; i++) {
780 diff[colIndex][i] = y[colIndex][i] - yFit[colIndex][i];
781 }
782 rmsResidual[colIndex] = rms_average(diff[colIndex], points);
783 /* the index iFit refers to the fit data column corresponding to colIndex */
784 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, y[colIndex], points, iy[colIndex]) ||
785 !SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, yFit[colIndex], points, iFit[colIndex]) ||
786 !SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, diff[colIndex], points, iResidual[colIndex]))
787 bomb("C", NULL);
788 }
789 if (infoFile)
790 if (!SDDS_SetColumnFromDoubles(&SDDSoutInfo, SDDS_SET_BY_INDEX, c->data, coeffs, iCoefficient[colIndex]))
791 SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
792 if (evalParameters.file) {
793 evalParameters.bw = bw;
794 makeEvaluationTable(&evalParameters, x, points, B, cov, c, xName, yNames, numYNames, colIndex, order);
795 }
796 }
797
798 if (ixSigma != -1 &&
799 !SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, rangeFitOnly ? sxOrig : sx, rangeFitOnly ? pointsOrig : points, ixSigma))
800 bomb("E", NULL);
801 if (infoFile) {
802 {
803 int32_t *Indices;
804 Indices = malloc(sizeof(*Indices) * coeffs);
805 for (i = 0; i < coeffs; i++)
806 Indices[i] = i;
807 if (!SDDS_SetColumn(&SDDSoutInfo, SDDS_SET_BY_NAME, Indices, coeffs, "Index"))
808 SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
809 }
810 }
811 for (colIndex = 0; colIndex < numYNames; colIndex++) {
812 if (ySigmasValid && iySigma[colIndex] != -1 &&
813 !SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_INDEX, rangeFitOnly ? syOrig[colIndex] : sy[colIndex],
814 rangeFitOnly ? pointsOrig : points, iySigma[colIndex]))
815 bomb("F", NULL);
816
817 /* info file has been removed for now. Splines have orders
818 but not used the same way as for regular least
819 squares of polynomials */
820 if (infoFile) {
821 if (!SDDS_SetParameters(&SDDSoutInfo, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE,
822 iRmsResidual[colIndex], rmsResidual[colIndex],
823 iChiSq[colIndex], (chi[colIndex] / degreesOfFreedom),
824 iSigLevel[colIndex], ChiSqrSigLevel(chi[colIndex], points - coeffs),
825 iOffset, xOffset, iFactor, xScaleFactor, iFitIsValid[colIndex], isFit[colIndex] ? 'y' : 'n', -1))
826 bomb("O", NULL);
827 }
828
829 /* writing the results for each page */
830 if (!SDDS_SetParameters(&SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE,
831 iRmsResidualO[colIndex], rmsResidual[colIndex],
832 iChiSqO[colIndex], (chi[colIndex] / degreesOfFreedom),
833 iSigLevelO[colIndex], ChiSqrSigLevel(chi[colIndex], points - coeffs),
834 iOffsetO, xOffset, iFactorO, xScaleFactor, iFitIsValidO[colIndex], isFit[colIndex] ? 'y' : 'n', -1))
835 bomb("O", NULL);
836 }
837 if (!SDDS_WritePage(&SDDSout) || (infoFile && !SDDS_WritePage(&SDDSoutInfo)))
838 bomb("O", NULL);
839 }
840
841 /* this is the end of the page, the wrap-up before going to the next page. */
842 if (xOrig != x)
843 free(xOrig);
844 if (sxOrig != sx)
845 free(sxOrig);
846 free(x);
847 free(sx);
848 for (colIndex = 0; colIndex < numYNames; colIndex++) {
849 free(diff[colIndex]);
850 free(sy[colIndex]);
851 if (yOrig[colIndex] != y[colIndex])
852 free(yOrig[colIndex]);
853 if (syOrig && sy0 && syOrig[colIndex] != sy0[colIndex])
854 free(syOrig[colIndex]);
855 free(y[colIndex]);
856 if (sy0 && sy0[colIndex])
857 free(sy0[colIndex]);
858 if (yFit && yFit[colIndex])
859 free(yFit[colIndex]);
860 }
861 gsl_bspline_free(bw);
862 gsl_vector_free(B);
863 gsl_matrix_free(X);
864 gsl_vector_free(yGsl);
865 gsl_vector_free(wGsl);
866 // gsl_vector_alloc(c);
867 gsl_matrix_free(cov);
868 gsl_multifit_linear_free(mw);
869 }
870
871 if (yFit)
872 free(yFit);
873 if (outputInitialized) {
874 if (!SDDS_Terminate(&SDDSout)) {
875 SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
876 }
877 if (infoFile) {
878 if (!SDDS_Terminate(&SDDSoutInfo)) {
879 SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
880 }
881 }
882 if (evalParameters.file) {
883 if (!SDDS_Terminate(&evalParameters.dataset)) {
884 SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
885 }
886 }
887 }
888 if (!SDDS_Terminate(&SDDSin)) {
889 SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors | SDDS_VERBOSE_PrintErrors);
890 }
891 free_scanargs(&s_arg, argc);
892
893 return EXIT_SUCCESS;
894}
895
896void RemoveElementFromStringArray(char **array, long index, long length) {
897 long lh;
898
899 for (lh = index; lh < length - 1; lh++)
900 array[lh] = array[lh + 1];
901}
902
903char **RemoveNonNumericColumnsFromNameArray(SDDS_DATASET *SDDSin, char **columns, int32_t *numColumns) {
904 long i, numNumericColumns = *numColumns;
905
906 for (i = 0; i < *numColumns; i++) {
907 if (SDDS_CheckColumn(SDDSin, columns[i], NULL, SDDS_ANY_NUMERIC_TYPE, NULL)) {
908 printf("Removing %s because not a numeric type.", columns[i]);
909 RemoveElementFromStringArray(columns, i, *numColumns);
910 numNumericColumns--;
911 }
912 }
913
914 *numColumns = numNumericColumns;
915 return (columns);
916}
917
918char **ResolveColumnNames(SDDS_DATASET *SDDSin, char **wildcardList, long length, int32_t *numYNames) {
919 char **result;
920 long i;
921
922 /* initially set the columns of interest to none, to make SDDS_OR work below */
923 SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, "", SDDS_AND);
924 for (i = 0; i < length; i++) {
925 SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, wildcardList[i], SDDS_OR);
926 }
927
928 if (!(result = SDDS_GetColumnNames(SDDSin, numYNames)) || *numYNames == 0)
929 bomb("Error matching columns in ResolveColumnNames: No matches.", NULL);
930
931 result = RemoveNonNumericColumnsFromNameArray(SDDSin, result, numYNames);
932 return (result);
933}
934
935char **GenerateYSigmaNames(char *controlString, char **yNames, long numYNames) {
936 long i, nameLength;
937 char **result, sigmaName[MAX_Y_SIGMA_NAME_SIZE];
938
939 result = tmalloc(sizeof(char *) * numYNames);
940 for (i = 0; i < numYNames; i++) {
941 sprintf(sigmaName, controlString, yNames[i]);
942 nameLength = strlen(sigmaName);
943 result[i] = tmalloc(sizeof(char) * (nameLength + 1));
944 strcpy(result[i], sigmaName);
945 }
946 return (result);
947}
948
949double rms_average(double *x, int64_t n) {
950 double sum2;
951 int64_t i;
952
953 for (i = sum2 = 0; i < n; i++)
954 sum2 += sqr(x[i]);
955
956 return (sqrt(sum2 / n));
957}
958
959void checkInputFile(SDDS_DATASET *SDDSin, char *xName, char **yNames, char *xSigmaName, char **ySigmaNames, long numYNames) {
960 char *ptr = NULL;
961 long i;
962
963 if (!(ptr = SDDS_FindColumn(SDDSin, FIND_NUMERIC_TYPE, xName, NULL)))
964 SDDS_Bomb("x column doesn't exist or is nonnumeric");
965 free(ptr);
966
967 /* y columns don't need to be checked because located using SDDS_SetColumnsOfInterest */
968
969 ptr = NULL;
970 if (xSigmaName && !(ptr = SDDS_FindColumn(SDDSin, FIND_NUMERIC_TYPE, xSigmaName, NULL)))
971 SDDS_Bomb("x sigma column doesn't exist or is nonnumeric");
972 if (ptr)
973 free(ptr);
974
975 if (ySigmaNames) {
976 for (i = 0; i < numYNames; i++) {
977 ptr = NULL;
978 if (!(ptr = SDDS_FindColumn(SDDSin, FIND_NUMERIC_TYPE, ySigmaNames[i], NULL)))
979 SDDS_Bomb("y sigma column doesn't exist or is nonnumeric");
980 if (ptr)
981 free(ptr);
982 }
983 }
984}
985
986void initializeOutputFile(SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSoutInfo, char *output, char *outputInfo,
987 SDDS_DATASET *SDDSin, char *input, char *xName, char **yNames, char *xSigmaName,
988 char **ySigmaNames, long sigmasValid, long int order, long coeffs, long breakpoints,
989 long numCols, long copyParameters) {
990 char buffer[SDDS_MAXLINE], buffer1[SDDS_MAXLINE], buffer2[SDDS_MAXLINE], buffer3[SDDS_MAXLINE];
991 char *xUnits, *yUnits;
992 // char ***coefUnits;
993 long colIndex;
994
995 /* all array names followed by an 'O' contain the index of the parameter in the main output file; others refer to
996 parameters in the infoFile */
997 // coefUnits = tmalloc(sizeof(char **) * numCols);
998 ySymbols = tmalloc(sizeof(char *) * numCols);
999 iChiSq = tmalloc(sizeof(long) * numCols);
1000 iChiSqO = tmalloc(sizeof(long) * numCols);
1001 iRmsResidual = tmalloc(sizeof(long) * numCols);
1002 iRmsResidualO = tmalloc(sizeof(long) * numCols);
1003 iSigLevel = tmalloc(sizeof(long) * numCols);
1004 iSigLevelO = tmalloc(sizeof(long) * numCols);
1005 iFitIsValid = tmalloc(sizeof(long) * numCols);
1006 iFitIsValidO = tmalloc(sizeof(long) * numCols);
1007 iy = tmalloc(sizeof(long) * numCols);
1008 iySigma = tmalloc(sizeof(long) * numCols);
1009 iFit = tmalloc(sizeof(long) * numCols);
1010 iResidual = tmalloc(sizeof(long) * numCols);
1011
1012 for (colIndex = 0; colIndex < numCols; colIndex++) {
1013 ySymbols[colIndex] = NULL;
1014 // coefUnits[colIndex] = tmalloc(sizeof(char *) * coeffs);
1015 iChiSq[colIndex] = -1;
1016 iChiSqO[colIndex] = -1;
1017 iRmsResidual[colIndex] = -1;
1018 iRmsResidualO[colIndex] = -1;
1019 iSigLevel[colIndex] = -1;
1020 iSigLevelO[colIndex] = -1;
1021 iFitIsValid[colIndex] = -1;
1022 iFitIsValidO[colIndex] = -1;
1023 iy[colIndex] = -1;
1024 iySigma[colIndex] = -1;
1025 iFit[colIndex] = -1;
1026 iResidual[colIndex] = -1;
1027 }
1028
1029 if (!SDDS_InitializeOutput(SDDSout, SDDS_BINARY, 0, NULL, "sddssplinefit output: fitted data", output) ||
1030 !SDDS_TransferColumnDefinition(SDDSout, SDDSin, xName, NULL) ||
1031 SDDS_GetColumnInformation(SDDSout, "symbol", &xSymbol, SDDS_GET_BY_NAME, xName) != SDDS_STRING ||
1032 (xSigmaName && !SDDS_TransferColumnDefinition(SDDSout, SDDSin, xSigmaName, NULL)))
1033 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1034
1035 for (colIndex = 0; colIndex < numCols; colIndex++) {
1036 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yNames[colIndex], NULL) ||
1037 SDDS_GetColumnInformation(SDDSout, "symbol", &ySymbols[colIndex], SDDS_GET_BY_NAME, yNames[colIndex]) != SDDS_STRING ||
1038 (ySigmaNames && !SDDS_TransferColumnDefinition(SDDSout, SDDSin, ySigmaNames[colIndex], NULL)))
1039 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1040 }
1041 if (!xSymbol || SDDS_StringIsBlank(xSymbol))
1042 xSymbol = xName;
1043 for (colIndex = 0; colIndex < numCols; colIndex++)
1044 if (!ySymbols[colIndex] || SDDS_StringIsBlank(ySymbols[colIndex]))
1045 ySymbols[colIndex] = yNames[colIndex];
1046 ix = SDDS_GetColumnIndex(SDDSout, xName);
1047 for (colIndex = 0; colIndex < numCols; colIndex++) {
1048 iy[colIndex] = SDDS_GetColumnIndex(SDDSout, yNames[colIndex]);
1049 if (ySigmaNames)
1050 iySigma[colIndex] = SDDS_GetColumnIndex(SDDSout, ySigmaNames[colIndex]);
1051 }
1052 if (xSigmaName)
1053 ixSigma = SDDS_GetColumnIndex(SDDSout, xSigmaName);
1054 if (SDDS_NumberOfErrors())
1055 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1056
1057 for (colIndex = 0; colIndex < numCols; colIndex++) {
1058 sprintf(buffer, "%sFit", yNames[colIndex]);
1059 sprintf(buffer1, "Fit[%s]", ySymbols[colIndex]);
1060 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yNames[colIndex], buffer) ||
1061 !SDDS_ChangeColumnInformation(SDDSout, "symbol", buffer1, SDDS_SET_BY_NAME, buffer))
1062 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1063 if ((iFit[colIndex] = SDDS_GetColumnIndex(SDDSout, buffer)) < 0)
1064 SDDS_Bomb("unable to get index of just-defined fit output column");
1065
1066 sprintf(buffer, "%sResidual", yNames[colIndex]);
1067 sprintf(buffer1, "Residual[%s]", ySymbols[colIndex]);
1068 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yNames[colIndex], buffer) ||
1069 !SDDS_ChangeColumnInformation(SDDSout, "symbol", buffer1, SDDS_SET_BY_NAME, buffer))
1070 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1071 if (!(iResidual[colIndex] = SDDS_GetColumnIndex(SDDSout, buffer)))
1072 SDDS_Bomb("unable to get index of just-defined residual output column");
1073
1074 if (sigmasValid && !ySigmaNames) {
1075 sprintf(buffer, "%sSigma", yNames[colIndex]);
1076 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yNames[colIndex], buffer))
1077 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1078 iySigma[colIndex] = SDDS_GetColumnIndex(SDDSout, buffer);
1079 if (ySymbols[colIndex] && !SDDS_StringIsBlank(ySymbols[colIndex])) {
1080 sprintf(buffer1, "Sigma[%s]", ySymbols[colIndex]);
1081 if (!SDDS_ChangeColumnInformation(SDDSout, "symbol", buffer1, SDDS_SET_BY_NAME, buffer))
1082 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1083 }
1084 }
1085 }
1086
1087 if (outputInfo && !SDDS_InitializeOutput(SDDSoutInfo, SDDS_BINARY, 0, NULL, "sddsspline output: fit information", outputInfo))
1088 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1089
1090 if (outputInfo) {
1091
1092 if (SDDS_DefineParameter(SDDSoutInfo, "Order", NULL, NULL, "Order of term in fit", NULL, SDDS_LONG, 0) < 0)
1093 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1094
1095 if (SDDS_DefineParameter(SDDSoutInfo, "Coefficients", NULL, NULL, "Number of Coefficients", NULL, SDDS_LONG, 0) < 0)
1096 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1097
1098 if (SDDS_DefineParameter(SDDSoutInfo, "Breakpoints", NULL, NULL, "Number of breakpoints", NULL, SDDS_LONG, 0) < 0)
1099 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1100
1101 if (SDDS_GetColumnInformation(SDDSout, "units", &xUnits, SDDS_GET_BY_NAME, xName) != SDDS_STRING)
1102 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1103 sprintf(buffer, "%sOffset", xName);
1104 sprintf(buffer1, "Offset of %s for fit", xName);
1105 if ((iOffset = SDDS_DefineParameter(SDDSoutInfo, buffer, NULL, xUnits, buffer1, NULL, SDDS_DOUBLE, NULL)) < 0)
1106 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1107 sprintf(buffer, "%sScale", xName);
1108 sprintf(buffer1, "Scale factor of %s for fit", xName);
1109 if ((iFactor = SDDS_DefineParameter(SDDSoutInfo, buffer, NULL, xUnits, buffer1, NULL, SDDS_DOUBLE, NULL)) < 0)
1110 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1111
1112 if (SDDS_DefineColumn(SDDSoutInfo, "Index", NULL, NULL, "Index of spline coefficients", NULL, SDDS_LONG, 0) < 0)
1113 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1114 for (colIndex = 0; colIndex < numCols; colIndex++) {
1115 if (SDDS_GetColumnInformation(SDDSout, "units", &yUnits, SDDS_GET_BY_NAME, yNames[colIndex]) != SDDS_STRING)
1116 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1117
1118 sprintf(buffer1, "%sCoefficient", yNames[colIndex]);
1119 sprintf(buffer2, "%sCoefficientSigma", yNames[colIndex]);
1120
1121 if (SDDS_DefineColumn(SDDSoutInfo, buffer1, NULL, yUnits, "Coefficient of spline fit", NULL, SDDS_DOUBLE, 0) < 0 ||
1122 (sigmasValid && SDDS_DefineColumn(SDDSoutInfo, buffer2, "$gs$r$ba$n", "[CoefficientUnits]", "sigma of coefficient of term in fit", NULL, SDDS_DOUBLE, 0) < 0))
1123 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1124
1125 iCoefficient[colIndex] = SDDS_GetColumnIndex(SDDSoutInfo, buffer1); /* this index is used for setting values of that column */
1126 iCoefficientSigma[colIndex] = SDDS_GetColumnIndex(SDDSoutInfo, buffer2);
1127
1128 sprintf(buffer1, "%sReducedChiSquared", yNames[colIndex]);
1129 sprintf(buffer2, "%sRmsResidual", yNames[colIndex]);
1130 sprintf(buffer3, "%sSignificanceLevel", yNames[colIndex]);
1131
1132 if ((iChiSq[colIndex] = SDDS_DefineParameter(SDDSoutInfo, buffer1, "$gh$r$a2$n/(N-M)", NULL,
1133 "Reduced chi-squared of fit",
1134 NULL, SDDS_DOUBLE, NULL)) < 0 ||
1135 SDDS_GetColumnInformation(SDDSout, "units", &yUnits, SDDS_GET_BY_NAME, yNames[colIndex]) != SDDS_STRING ||
1136 (iRmsResidual[colIndex] =
1137 SDDS_DefineParameter(SDDSoutInfo, buffer2, "$gs$r$bres$n", yUnits, "RMS residual of fit", NULL, SDDS_DOUBLE, NULL)) < 0 ||
1138 (iSigLevel[colIndex] = SDDS_DefineParameter(SDDSoutInfo, buffer3, NULL, NULL, "Probability that data is from fit function", NULL, SDDS_DOUBLE, NULL)) < 0)
1139 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1140 if (yUnits)
1141 free(yUnits);
1142
1143 sprintf(buffer, "%sFitIsValid", yNames[colIndex]);
1144 if ((iFitIsValid[colIndex] = SDDS_DefineParameter(SDDSoutInfo, buffer, NULL, NULL, NULL, NULL, SDDS_CHARACTER, NULL)) < 0)
1145 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1146 }
1147 }
1148 if (SDDS_DefineParameter1(SDDSout, "Order", NULL, NULL, "Order of splines", NULL, SDDS_LONG, &order) < 0)
1149 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1150
1151 if (SDDS_DefineParameter1(SDDSout, "Coefficients", NULL, NULL, "Number of coeffs in fit", NULL, SDDS_LONG, &coeffs) < 0)
1152 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1153
1154 if (SDDS_DefineParameter1(SDDSout, "Breakpoints", NULL, NULL, "Number of break points in fit", NULL, SDDS_LONG, &breakpoints) < 0)
1155 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1156
1157 if (SDDS_GetColumnInformation(SDDSout, "units", &xUnits, SDDS_GET_BY_NAME, xName) != SDDS_STRING)
1158 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1159 sprintf(buffer, "%sOffset", xName);
1160 sprintf(buffer1, "Offset of %s for fit", xName);
1161 if ((iOffsetO = SDDS_DefineParameter(SDDSout, buffer, NULL, xUnits, buffer1, NULL, SDDS_DOUBLE, NULL)) < 0)
1162 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1163 sprintf(buffer, "%sScale", xName);
1164 sprintf(buffer1, "Scale factor of %s for fit", xName);
1165 if ((iFactorO = SDDS_DefineParameter(SDDSout, buffer, NULL, xUnits, buffer1, NULL, SDDS_DOUBLE, NULL)) < 0)
1166 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1167
1168 for (colIndex = 0; colIndex < numCols; colIndex++) {
1169
1170 sprintf(buffer1, "%sReducedChiSquared", yNames[colIndex]);
1171 sprintf(buffer2, "%sRmsResidual", yNames[colIndex]);
1172 sprintf(buffer3, "%sSignificanceLevel", yNames[colIndex]);
1173
1174 if ((iChiSqO[colIndex] = SDDS_DefineParameter(SDDSout, buffer1, "$gh$r$a2$n/(N-M)", NULL,
1175 "Reduced chi-squared of fit",
1176 NULL, SDDS_DOUBLE, NULL)) < 0 ||
1177 SDDS_GetColumnInformation(SDDSout, "units", &yUnits, SDDS_GET_BY_NAME, yNames[colIndex]) != SDDS_STRING ||
1178 (iRmsResidualO[colIndex] =
1179 SDDS_DefineParameter(SDDSout, buffer2, "$gs$r$bres$n", yUnits, "RMS residual of fit", NULL, SDDS_DOUBLE, NULL)) < 0 ||
1180 (iSigLevelO[colIndex] = SDDS_DefineParameter(SDDSout, buffer3, NULL, NULL, "Probability that data is from fit function", NULL, SDDS_DOUBLE, NULL)) < 0)
1181 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1182 if (yUnits)
1183 free(yUnits);
1184
1185 sprintf(buffer, "%sFitIsValid", yNames[colIndex]);
1186 if ((iFitIsValidO[colIndex] = SDDS_DefineParameter(SDDSout, buffer, NULL, NULL, NULL, NULL, SDDS_CHARACTER, NULL)) < 0)
1187 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1188 }
1189
1190 if (copyParameters) {
1191 if (!SDDS_TransferAllParameterDefinitions(SDDSout, SDDSin, 0))
1192 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1193 if (outputInfo && !SDDS_TransferAllParameterDefinitions(SDDSoutInfo, SDDSin, 0))
1194 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1195 }
1196
1197 if ((outputInfo && !SDDS_WriteLayout(SDDSoutInfo)) || !SDDS_WriteLayout(SDDSout))
1198 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1199
1200 // return coefUnits;
1201 return;
1202}
1203
1204void setupEvaluationFile(EVAL_PARAMETERS * evalParameters, char *xName, char **yName, long yNames, SDDS_DATASET *SDDSin) {
1205 long iYName, i, iCoeff, coeffs;
1206 char *xSymbol, *ySymbol;
1207 char *mainTemplateFirstDeriv[3] = {"%yNameDeriv", "Derivative w.r.t. %xSymbol of %ySymbol", "d[%ySymbol]/d[%xSymbol]"};
1208 char *mainTemplate[3];
1209 char buffer[1024];
1210 char ***yDerivName, ***yDerivUnits;
1211 long nderiv, derivOrder;
1212 long *iSpline;
1213 gsl_bspline_workspace *bw;
1214 SDDS_DATASET *SDDSout;
1215#if GSL_MAJOR_VERSION == 1
1216 gsl_bspline_deriv_workspace *bdw;
1217#endif
1218 SDDSout = &evalParameters->dataset;
1219 bw = evalParameters->bw;
1220 coeffs = gsl_bspline_ncoeffs(bw);
1221
1222 if (!SDDS_InitializeOutput(SDDSout, SDDS_BINARY, 0, NULL, "sddsspline output: evaluation of spline fits", evalParameters->file) ||
1223 !SDDS_TransferColumnDefinition(SDDSout, SDDSin, xName, NULL))
1224 SDDS_Bomb("Problem setting up evaluation file");
1225 if (SDDS_GetColumnInformation(SDDSout, "symbol", &xSymbol, SDDS_GET_BY_NAME, xName) != SDDS_STRING) {
1226 /* fprintf(stderr, "error: problem getting symbol for column %s\n", xName);*/
1227 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1228 }
1229 if (!xSymbol)
1230 SDDS_CopyString(&xSymbol, xName);
1231
1232 if (evalParameters->flags & EVAL_PROVIDEBASIS) {
1233 iSpline = tmalloc(sizeof(*iSpline) * coeffs); /* dataset index of spline column names */
1234 evalParameters->iSpline = iSpline;
1235 for (iCoeff = 0; iCoeff < coeffs; iCoeff++) {
1236 sprintf(buffer, "B%04ld", iCoeff);
1237 if ((iSpline[iCoeff] = SDDS_DefineColumn(SDDSout, buffer, NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0)) < 0)
1238 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1239 }
1240 }
1241
1242 if (evalParameters->flags & EVAL_DERIVATIVES) {
1243 nderiv = evalParameters->nderiv;
1244 yDerivName = tmalloc(sizeof(*yDerivName) * (nderiv + 1));
1245 evalParameters->yDerivName = yDerivName;
1246 yDerivUnits = tmalloc(sizeof(*yDerivUnits) * (nderiv + 1));
1247 evalParameters->yDerivUnits = yDerivUnits;
1248 for (derivOrder = 0; derivOrder <= nderiv; derivOrder++) {
1249 yDerivName[derivOrder] = tmalloc(sizeof(**yDerivName) * yNames);
1250 yDerivUnits[derivOrder] = tmalloc(sizeof(**yDerivUnits) * yNames);
1251 }
1252 for (iYName = 0; iYName < yNames; iYName++) {
1253 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yName[iYName], NULL))
1254 SDDS_Bomb("Problem setting up evaluation file");
1255 yDerivName[0][iYName] = yName[iYName];
1256 /* first derivative is the first one */
1257 for (derivOrder = 1; derivOrder <= nderiv; derivOrder++) {
1258 for (i = 0; i < 3; i++) {
1259 if (derivOrder != 1) {
1260 switch (i) {
1261 case 0:
1262 /* name */
1263 sprintf(buffer, "%%yNameDeriv%ld", derivOrder);
1264 break;
1265 case 1:
1266 /* description */
1267 sprintf(buffer, "Derivative %ld w.r.t. %%xSymbol of %%ySymbol", derivOrder);
1268 break;
1269 case 2:
1270 /* symbol */
1271 sprintf(buffer, "d$a%ld$n[%%ySymbol]/d[%%xSymbol]$a%ld$n", derivOrder, derivOrder);
1272 break;
1273 }
1274 cp_str(&mainTemplate[i], buffer);
1275 } else {
1276 mainTemplate[i] = mainTemplateFirstDeriv[i];
1277 }
1278 }
1279 if (SDDS_GetColumnInformation(SDDSout, "symbol", &ySymbol, SDDS_GET_BY_NAME, yName[iYName]) != SDDS_STRING) {
1280 fprintf(stderr, "error: problem getting symbol for column %s\n", yName[iYName]);
1281 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1282 }
1283 if (!ySymbol || SDDS_StringIsBlank(ySymbol))
1284 SDDS_CopyString(&ySymbol, yName[iYName]);
1285 /* I'm using the function changeInformation from sddsderiv which requires an existing column of some kind*/
1286 if (SDDS_DefineColumn(SDDSout, "placeholderName", NULL, NULL, NULL, NULL, SDDS_DOUBLE, 0) < 0)
1287 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1288 yDerivUnits[derivOrder][iYName] = (char *)divideColumnUnits(SDDSout, yDerivName[derivOrder - 1][iYName], xName);
1289 yDerivName[derivOrder][iYName] = (char *)changeInformation(SDDSout, "placeholderName", yDerivName[0][iYName],
1290 ySymbol, xName, xSymbol, mainTemplate, yDerivUnits[derivOrder][iYName]);
1291 }
1292 }
1293 if (!SDDS_WriteLayout(SDDSout))
1294 SDDS_Bomb("Problem setting up evaluation file with derivatives");
1295 } else {
1296 for (iYName = 0; iYName < yNames; iYName++) {
1297 if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yName[iYName], NULL))
1298 SDDS_Bomb("Problem setting up evaluation file");
1299 }
1300 if (!SDDS_WriteLayout(SDDSout))
1301 SDDS_Bomb("Problem setting up evaluation file");
1302 }
1303}
1304
1305/* called for each column, for each page */
1306void makeEvaluationTable(EVAL_PARAMETERS * evalParameters, double *x, int64_t points,
1307 gsl_vector *B, gsl_matrix *cov, gsl_vector *c,
1308 char *xName, char **yName, long yNames, long iYName, long int order) {
1309 static double *xEval = NULL, *yEval = NULL;
1310 static int64_t maxEvals = 0;
1311 double delta;
1312 double yerr;
1313 int64_t i;
1314 char ***yDerivName;
1315 double xi;
1316 gsl_matrix *dB = NULL;
1317 long nderiv, coeffs, iCoeff, derivOrder;
1318 double **yDeriv = NULL;
1319 double **Bspline;
1320 long *iSpline;
1321 gsl_bspline_workspace *bw;
1322 SDDS_DATASET *SDDSout;
1323#if GSL_MAJOR_VERSION == 1
1324 gsl_bspline_deriv_workspace *bdw;
1325#endif
1326 SDDSout = &evalParameters->dataset;
1327 yDerivName = evalParameters->yDerivName;
1328 iSpline = evalParameters->iSpline;
1329 bw = evalParameters->bw;
1330 coeffs = gsl_bspline_ncoeffs(bw);
1331
1332 if (!(evalParameters->flags & EVAL_BEGIN_GIVEN) || !(evalParameters->flags & EVAL_END_GIVEN)) {
1333 double min, max;
1334 find_min_max(&min, &max, x, points);
1335 if (!(evalParameters->flags & EVAL_BEGIN_GIVEN))
1336 evalParameters->begin = min;
1337 if (!(evalParameters->flags & EVAL_END_GIVEN))
1338 evalParameters->end = max;
1339 }
1340 if (!(evalParameters->flags & EVAL_NUMBER_GIVEN))
1341 evalParameters->number = points;
1342 if (evalParameters->number > 1)
1343 delta = (evalParameters->end - evalParameters->begin) / (evalParameters->number - 1);
1344 else
1345 delta = 0;
1346
1347 if (!xEval || maxEvals < evalParameters->number) {
1348 if (!(xEval = (double *)SDDS_Realloc(xEval, sizeof(*xEval) * evalParameters->number)) ||
1349 !(yEval = (double *)SDDS_Realloc(yEval, sizeof(*yEval) * evalParameters->number)))
1350 SDDS_Bomb("allocation failure");
1351 maxEvals = evalParameters->number;
1352 }
1353
1354 Bspline = tmalloc(sizeof(*Bspline) * coeffs);
1355 /* the allocation and calculation of Bspline is done only on the first column */
1356 if ((iYName == 0) && (evalParameters->flags & EVAL_PROVIDEBASIS)) {
1357 for (iCoeff = 0; iCoeff < coeffs; iCoeff++) {
1358 Bspline[iCoeff] = tmalloc(sizeof(**Bspline) * evalParameters->number);
1359 }
1360 }
1361
1362 if (!(evalParameters->flags & EVAL_DERIVATIVES)) {
1363 for (i = 0; i < evalParameters->number; i++) {
1364 xi = evalParameters->begin + i * delta;
1365 xEval[i] = xi;
1366 gsl_bspline_eval(xi, B, bw);
1367 gsl_multifit_linear_est(B, c, cov, &yEval[i], &yerr);
1368 if ((iYName == 0) && (evalParameters->flags & EVAL_PROVIDEBASIS)) {
1369 for (iCoeff = 0; iCoeff < coeffs; iCoeff++) {
1370 Bspline[iCoeff][i] = gsl_vector_get(B, iCoeff);
1371 }
1372 }
1373 }
1374 /* Oh, this filters so that StartPage and WritePage are run only once. Tricksty */
1375 /* On the other hand the x values is repeatedly assigned the values until the calls stop */
1376 if ((iYName == 0 &&
1377 !SDDS_StartPage(SDDSout, evalParameters->number)) ||
1378 !SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_NAME, xEval, evalParameters->number, xName) ||
1379 !SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_NAME, yEval, evalParameters->number, yName[iYName]))
1380 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1381 if ((iYName == 0) && (evalParameters->flags & EVAL_PROVIDEBASIS)) {
1382 for (iCoeff = 0; iCoeff < coeffs; iCoeff++)
1383 if (!SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_INDEX, Bspline[iCoeff], evalParameters->number, iSpline[iCoeff]))
1384 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1385 }
1386 if ((iYName == yNames - 1 && !SDDS_WritePage(SDDSout)))
1387 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1388 } else {
1389 nderiv = evalParameters->nderiv;
1390 yDeriv = tmalloc(sizeof(double *) * (nderiv + 1));
1391 for (derivOrder = 0; derivOrder <= nderiv; derivOrder++) {
1392 yDeriv[derivOrder] = tmalloc(sizeof(double) * evalParameters->number);
1393 }
1394 dB = gsl_matrix_alloc(coeffs, nderiv + 1);
1395 for (i = 0; i < evalParameters->number; i++) {
1396 xi = evalParameters->begin + i * delta;
1397 xEval[i] = xi;
1398 gsl_bspline_eval(xi, B, bw);
1399 gsl_multifit_linear_est(B, c, cov, &yEval[i], &yerr);
1400#if GSL_MAJOR_VERSION == 1
1401 bdw = gsl_bspline_deriv_alloc(order);
1402 gsl_bspline_deriv_eval(xi, nderiv, dB, bw, bdw);
1403 gsl_bspline_deriv_free(bdw);
1404#else
1405 gsl_bspline_deriv_eval(xi, nderiv, dB, bw);
1406#endif
1407 for (derivOrder = 0; derivOrder <= nderiv; derivOrder++) {
1408 yDeriv[derivOrder][i] = 0;
1409 for (iCoeff = 0; iCoeff < coeffs; iCoeff++) {
1410 yDeriv[derivOrder][i] += gsl_vector_get(c, iCoeff) * gsl_matrix_get(dB, iCoeff, derivOrder);
1411 }
1412 }
1413 if ((iYName == 0) && (evalParameters->flags & EVAL_PROVIDEBASIS)) {
1414 for (iCoeff = 0; iCoeff < coeffs; iCoeff++) {
1415 Bspline[iCoeff][i] = gsl_vector_get(B, iCoeff);
1416 }
1417 }
1418 }
1419 gsl_matrix_free(dB);
1420 /* Oh, this filters so that StartPage and WritePage are run only once. Tricksty */
1421 /* On the other hand the x values is repeatedly assigned the values until the calls stop */
1422 if ((iYName == 0 &&
1423 !SDDS_StartPage(SDDSout, evalParameters->number)) ||
1424 !SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_NAME, xEval, evalParameters->number, xName) ||
1425 !SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_NAME, yEval, evalParameters->number, yName[iYName]))
1426 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1427 /* set derivatives */ /* don't need the 0th derivative */
1428 for (derivOrder = 1; derivOrder <= nderiv; derivOrder++) {
1429 if (!SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_NAME, yDeriv[derivOrder], evalParameters->number, yDerivName[derivOrder][iYName]))
1430 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1431 }
1432 if ((iYName == 0) && (evalParameters->flags & EVAL_PROVIDEBASIS)) {
1433 for (iCoeff = 0; iCoeff < coeffs; iCoeff++)
1434 if (!SDDS_SetColumnFromDoubles(SDDSout, SDDS_SET_BY_INDEX, Bspline[iCoeff], evalParameters->number, iSpline[iCoeff]))
1435 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1436 }
1437 if ((iYName == yNames - 1 && !SDDS_WritePage(SDDSout)))
1438 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1439 if (yDeriv) {
1440 for (derivOrder = 0; derivOrder <= nderiv; derivOrder++) {
1441 free(yDeriv[derivOrder]);
1442 }
1443 free(yDeriv);
1444 }
1445 }
1446 if ((iYName == 0) && (evalParameters->flags & EVAL_PROVIDEBASIS)) {
1447 for (iCoeff = 0; iCoeff < coeffs; iCoeff++) {
1448 free(Bspline[iCoeff]);
1449 }
1450 }
1451 if (iYName == 0)
1452 free(Bspline);
1453}
1454
1455char *changeInformation(SDDS_DATASET * SDDSout, char *name, char *nameRoot, char *symbolRoot, char *xName, char *xSymbol, char **template, char *newUnits) {
1456 char buffer1[SDDS_MAXLINE], buffer2[SDDS_MAXLINE], *ptr;
1457
1458 if (!SDDS_ChangeColumnInformation(SDDSout, "units", newUnits, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, name))
1459 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1460
1461 makeSubstitutions(buffer1, buffer2, template[2], nameRoot, symbolRoot, xName, xSymbol);
1462 if (!SDDS_ChangeColumnInformation(SDDSout, "symbol", buffer2, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, name))
1463 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1464
1465 makeSubstitutions(buffer1, buffer2, template[1], nameRoot, symbolRoot, xName, xSymbol);
1466 if (!SDDS_ChangeColumnInformation(SDDSout, "description", buffer2, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, name))
1467 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1468
1469 makeSubstitutions(buffer1, buffer2, template[0], nameRoot, symbolRoot, xName, xSymbol);
1470 if (!SDDS_ChangeColumnInformation(SDDSout, "name", buffer2, SDDS_PASS_BY_VALUE | SDDS_SET_BY_NAME, name))
1471 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
1472 SDDS_CopyString(&ptr, buffer2);
1473 return ptr;
1474}
1475
1476void makeSubstitutions(char *buffer1, char *buffer2, char *template, char *nameRoot, char *symbolRoot, char *xName, char *xSymbol) {
1477 strcpy(buffer2, template);
1478 replace_string(buffer1, buffer2, "%ySymbol", symbolRoot);
1479 replace_string(buffer2, buffer1, "%xSymbol", xSymbol);
1480 replace_string(buffer1, buffer2, "%yName", nameRoot);
1481 replace_string(buffer2, buffer1, "%xName", xName);
1482 strcpy(buffer1, buffer2);
1483}
1484
1485int print_matrix(FILE * f, const gsl_matrix *m) {
1486 int status, n = 0;
1487 size_t i, j;
1488 for (i = 0; i < m->size1; i++) {
1489 for (j = 0; j < m->size2; j++) {
1490 if ((status = fprintf(f, "%10.6lf ", gsl_matrix_get(m, i, j))) < 0)
1491 return -1;
1492 n += status;
1493 }
1494
1495 if ((status = fprintf(f, "\n")) < 0)
1496 return -1;
1497 n += status;
1498 }
1499
1500 return n;
1501}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_GetColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Retrieves information about a specified column in the SDDS dataset.
Definition SDDS_info.c:41
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_DefineParameter1(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, void *fixed_value)
Defines a data parameter with a fixed numerical value.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_NumberOfErrors()
Retrieves the number of errors recorded by SDDS library routines.
Definition SDDS_utils.c:304
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
char * SDDS_FindColumn(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Finds the first column in the SDDS dataset that matches the specified criteria.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_CHARACTER
Identifier for the character data type.
Definition SDDStypes.h:91
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
Definition cp_str.c:28
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int replace_string(char *t, char *s, char *orig, char *repl)
Replace all occurrences of one string with another string.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
double ipower(double x, long n)
Evaluate a power function x^n.
double ChiSqrSigLevel(double ChiSquared0, long nu)
Computes the probability that a chi-squared variable exceeds a given value.
Definition sigLevel.c:64
char * str_tolower(char *s)
Convert a string to lower case.
Definition str_tolower.c:27