SDDSlib
Loading...
Searching...
No Matches
sddsdistest.c
Go to the documentation of this file.
1/**
2 * @file sddsdistest.c
3 * @brief Tests data against various statistical distributions.
4 *
5 * This program reads input data, performs statistical tests (Kolmogorov-Smirnov or Chi-Squared)
6 * against specified distributions (Gaussian, Poisson, Student's t, or user-defined via a file),
7 * and outputs the results. It supports multithreading and can handle data in either row-major
8 * or column-major order.
9 *
10 * @details
11 * Usage:
12 * \code
13 * sddsdistest [<input>] [<output>]
14 * [-pipe=[in][,out]]
15 * -column=<name>[,sigma=<name>]...
16 * -exclude=<name>[,...]
17 * [-degreesOfFreedom={<value>|@<parameterName>}]
18 * [-test={ks|chisquared}]
19 * [{-fileDistribution=<filename>,<indepName>,<depenName> |
20 * -gaussian | -poisson | -student | -chisquared}]
21 * [-majorOrder=row|column]
22 * [-threads=<number>]
23 * \endcode
24 *
25 * @copyright
26 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
27 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
28 *
29 * @license
30 * This file is distributed under the terms of the Software License Agreement
31 * found in the file LICENSE included with this distribution.
32 *
33 * @author M. Borland, R. Soliday, Xuesong Jiao, H. Shang
34 */
35
36#include "mdb.h"
37#include "SDDS.h"
38#include "SDDSutils.h"
39#include "scan.h"
40
41#if defined(_WIN32)
42# if _MSC_VER <= 1600
43# include "fdlibm.h"
44# endif
45#endif
46
47void compareToFileDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, long columnNames, char *distFile, char *distFileIndep, char *distFileDepen);
48void compareToDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, char **sigmaName, long columnNames, long distCode, long degreesFree, char *dofParameter, short columnMajorOrder, int threads);
49void ksTestWithFunction(double *data, int64_t rows, double (*CDF)(double x), double *statReturn, double *sigLevelReturn);
50void chiSquaredTestWithFunction(double *data, int64_t rows, double (*PDF)(double x), double *statReturn, double *sigLevelReturn);
51
52static char *USAGE =
53 "Usage: sddsdistest [<input>] [<output>]\n"
54 " [-pipe=[in][,out]]\n"
55 " -column=<name>[,sigma=<name>]...\n"
56 " -exclude=<name>[,...]\n"
57 " [-degreesOfFreedom={<value>|@<parameterName>}]\n"
58 " [-test={ks|chisquared}]\n"
59 " [{-fileDistribution=<filename>,<indepName>,<depenName> |\n"
60 " -gaussian | -poisson | -student | -chisquared}]\n"
61 " [-majorOrder=row|column]\n"
62 " [-threads=<number>]\n\n"
63 "Description:\n"
64 " Tests data columns against specified statistical distributions using the\n"
65 " Kolmogorov-Smirnov or Chi-Squared tests.\n\n"
66 "Options:\n"
67 " <input> Input SDDS file. If omitted, standard input is used.\n"
68 " <output> Output SDDS file. If omitted, standard output is used.\n"
69 " -pipe=[in][,out] Use pipe for input and/or output.\n"
70 " -column=<name>[,sigma=<name>]...\n"
71 " Specify one or more columns to test, optionally with\n"
72 " corresponding sigma (error) columns.\n"
73 " -exclude=<name>[,...] Exclude specified columns from testing.\n"
74 " -degreesOfFreedom={<value>|@<parameterName>}\n"
75 " Specify degrees of freedom as a fixed value or reference\n"
76 " a parameter in the input SDDS file.\n"
77 " -test={ks|chisquared} Choose the statistical test to perform: 'ks' for\n"
78 " Kolmogorov-Smirnov or 'chisquared' for Chi-Squared.\n"
79 " -fileDistribution=<filename>,<indepName>,<depenName>\n"
80 " Use a user-defined distribution from a file.\n"
81 " -gaussian Test against a Gaussian distribution.\n"
82 " -poisson Test against a Poisson distribution.\n"
83 " -student Test against a Student's t distribution.\n"
84 " -chisquared Test against a Chi-Squared distribution.\n"
85 " -majorOrder=row|column Specify data ordering: 'row' for row-major or 'column'\n"
86 " for column-major.\n"
87 " -threads=<number> Number of threads to use for computations.\n\n"
88 "Program Information:\n"
89 " Program by M. Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
90
91/* Enumeration for option types */
92enum option_type {
93 CLO_PIPE,
94 CLO_COLUMN,
95 CLO_TEST,
96 CLO_FILEDIST,
97 CLO_GAUSSIAN,
98 CLO_POISSON,
99 CLO_STUDENT,
100 CLO_CHISQUARED,
101 CLO_DOF,
102 CLO_EXCLUDE,
103 CLO_MAJOR_ORDER,
104 CLO_THREADS,
105 N_OPTIONS
106};
107
108static char *option[N_OPTIONS] = {
109 "pipe",
110 "column",
111 "test",
112 "filedistribution",
113 "gaussian",
114 "poisson",
115 "student",
116 "chisquared",
117 "degreesoffreedom",
118 "exclude",
119 "majorOrder",
120 "threads"
121};
122
123#define KS_TEST 0
124#define CHI_TEST 1
125#define N_TESTS 2
126static char *testChoice[N_TESTS] = {
127 "ks",
128 "chisquared",
129};
130
131int main(int argc, char **argv) {
132 int iArg;
133 unsigned long dummyFlags, pipeFlags, majorOrderFlag;
134 SCANNED_ARG *scanned;
135 SDDS_DATASET SDDSin;
136 char *input = NULL, *output = NULL, *distFile = NULL, **columnName = NULL, **sigmaName = NULL, **excludeName = NULL, *distFileIndep = NULL, *distFileDepen = NULL;
137 long testCode = 0, distCode = -1, code, degreesFree = -1, columnNames = 0, excludeNames = 0;
138 char *dofParameter = NULL;
139 short columnMajorOrder = -1;
140 int threads = 1;
141
143 argc = scanargs(&scanned, argc, argv);
144 if (argc < 3)
145 bomb(NULL, USAGE);
146
147 pipeFlags = 0;
148
149 for (iArg = 1; iArg < argc; iArg++) {
150 if (scanned[iArg].arg_type == OPTION) {
151 /* process options here */
152 code = match_string(scanned[iArg].list[0], option, N_OPTIONS, 0);
153 switch (code) {
154 case CLO_MAJOR_ORDER:
155 majorOrderFlag = 0;
156 scanned[iArg].n_items--;
157 if (scanned[iArg].n_items > 0 &&
158 !scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
159 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
160 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)) {
161 SDDS_Bomb("invalid -majorOrder syntax/values");
162 }
163 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
164 columnMajorOrder = 1;
165 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
166 columnMajorOrder = 0;
167 break;
168 case CLO_PIPE:
169 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
170 SDDS_Bomb("invalid -pipe syntax/values");
171 break;
172 case CLO_COLUMN:
173 if ((scanned[iArg].n_items != 2 && scanned[iArg].n_items != 3) ||
174 SDDS_StringIsBlank(scanned[iArg].list[1])) {
175 SDDS_Bomb("invalid -column syntax/values");
176 }
177 moveToStringArray(&columnName, &columnNames, scanned[iArg].list + 1, 1);
178 sigmaName = SDDS_Realloc(sigmaName, sizeof(*sigmaName) * columnNames);
179 if (scanned[iArg].n_items == 3) {
180 scanned[iArg].n_items -= 2;
181 if (!scan_item_list(&dummyFlags, scanned[iArg].list + 2, &scanned[iArg].n_items, "sigma",
182 SDDS_STRING, sigmaName + columnNames - 1, 1, 1, NULL) ||
183 dummyFlags != 1 ||
184 SDDS_StringIsBlank(sigmaName[columnNames - 1])) {
185 SDDS_Bomb("invalid -column syntax/values");
186 }
187 } else {
188 sigmaName[columnNames - 1] = NULL;
189 }
190 break;
191 case CLO_TEST:
192 if (scanned[iArg].n_items != 2 ||
193 (testCode = match_string(scanned[iArg].list[1], testChoice, N_TESTS, 0)) < 0) {
194 SDDS_Bomb("invalid -test syntax/values");
195 }
196 break;
197 case CLO_FILEDIST:
198 if (scanned[iArg].n_items != 4) {
199 SDDS_Bomb("too few qualifiers for -fileDistribution");
200 }
201 if (SDDS_StringIsBlank(distFile = scanned[iArg].list[1]) ||
202 SDDS_StringIsBlank(distFileIndep = scanned[iArg].list[2]) ||
203 SDDS_StringIsBlank(distFileDepen = scanned[iArg].list[3])) {
204 SDDS_Bomb("invalid -fileDistribution values");
205 }
206 break;
207 case CLO_GAUSSIAN:
208 case CLO_POISSON:
209 case CLO_STUDENT:
210 case CLO_CHISQUARED:
211 distCode = code;
212 break;
213 case CLO_DOF:
214 if (scanned[iArg].n_items != 2) {
215 SDDS_Bomb("too few qualifiers for -degreesOfFreedom");
216 }
217 if (scanned[iArg].list[1][0] == '@') {
218 if (!SDDS_CopyString(&dofParameter, scanned[iArg].list[1] + 1)) {
219 SDDS_Bomb("memory allocation failure");
220 }
221 } else if (sscanf(scanned[iArg].list[1], "%ld", &degreesFree) != 1 || degreesFree <= 1) {
222 SDDS_Bomb("invalid degrees-of-freedom given for -student/-chiSquared");
223 }
224 break;
225 case CLO_EXCLUDE:
226 if (scanned[iArg].n_items < 2 || SDDS_StringIsBlank(scanned[iArg].list[1])) {
227 SDDS_Bomb("invalid -exclude syntax/values");
228 }
229 moveToStringArray(&excludeName, &excludeNames, scanned[iArg].list + 1, scanned[iArg].n_items - 1);
230 break;
231 case CLO_THREADS:
232 if (scanned[iArg].n_items != 2 ||
233 !sscanf(scanned[iArg].list[1], "%d", &threads) ||
234 threads < 1) {
235 SDDS_Bomb("invalid -threads syntax");
236 }
237 break;
238 default:
239 fprintf(stderr, "error: unknown/ambiguous option: %s (%s)\n", scanned[iArg].list[0], argv[0]);
240 exit(EXIT_FAILURE);
241 }
242 } else {
243 if (!input)
244 input = scanned[iArg].list[0];
245 else if (!output)
246 output = scanned[iArg].list[0];
247 else
248 SDDS_Bomb("too many filenames seen");
249 }
250 }
251
252 processFilenames("sddsdistest", &input, &output, pipeFlags, 0, NULL);
253
254 if (!SDDS_InitializeInput(&SDDSin, input))
255 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
256 if (!columnNames)
257 SDDS_Bomb("-column option must be supplied");
258 if (!(columnNames = expandColumnPairNames(&SDDSin, &columnName, &sigmaName, columnNames, excludeName, excludeNames, FIND_NUMERIC_TYPE, 0))) {
259 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
260 SDDS_Bomb("named columns nonexistent or nonnumerical");
261 }
262 if (dofParameter && SDDS_CheckParameter(&SDDSin, dofParameter, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OKAY)
263 SDDS_Bomb("degrees-of-freedom parameter not found");
264
265 if (distFile)
266 compareToFileDistribution(output, testCode, &SDDSin, columnName, columnNames, distFile, distFileIndep, distFileDepen);
267 else
268 compareToDistribution(output, testCode, &SDDSin, columnName, sigmaName, columnNames, distCode, degreesFree, dofParameter, columnMajorOrder, threads);
269
270 if (!SDDS_Terminate(&SDDSin)) {
271 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
272 return EXIT_FAILURE;
273 }
274 return EXIT_SUCCESS;
275}
276
277void compareToFileDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, long columnNames, char *distFile, char *distFileIndep, char *distFileDepen) {
278 SDDS_DATASET SDDSdist;
279 if (!SDDS_InitializeInput(&SDDSdist, distFile))
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281 if (SDDS_CheckColumn(&SDDSdist, distFileIndep, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OKAY ||
282 SDDS_CheckColumn(&SDDSdist, distFileDepen, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OKAY)
283 exit(EXIT_FAILURE);
284 if (!SDDS_Terminate(&SDDSdist)) {
285 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
286 exit(EXIT_FAILURE);
287 }
288 SDDS_Bomb("-fileDistribution option not implemented yet");
289}
290
291static double sampleMean, sampleStDev;
292static int32_t DOF;
293
294double gaussianPDF(double x) {
295 double z;
296 z = (x - sampleMean) / sampleStDev;
297 return exp(-z * z / 2) / sqrt(PIx2);
298}
299
300double gaussianCDF(double x) {
301 long zSign;
302 double z;
303 z = (x - sampleMean) / sampleStDev;
304 zSign = 1;
305 if (z < 0) {
306 zSign = -1;
307 z = -z;
308 }
309 return (1 + zSign * erf(z / SQRT2)) / 2.0;
310}
311
312#define POISSON_ACCURACY (1e-8)
313
314double poissonPDF(double xd) {
315 long x;
316
317 if ((x = xd) < 0)
318 return 0;
319 return exp(-sampleMean + x * log(sampleMean) - lgamma(x + 1.0));
320}
321
322double poissonCDF(double xd) {
323 double CDF, term, accuracy;
324 long x, n;
325
326 if (xd < 0)
327 xd = 0;
328 x = xd;
329 xd = x;
330 term = 1;
331 CDF = 1;
332 accuracy = POISSON_ACCURACY / exp(-sampleMean);
333 for (n = 1; n <= x; n++) {
334 term *= sampleMean / n;
335 CDF += term;
336 if (term < accuracy)
337 break;
338 }
339 return CDF * exp(-sampleMean);
340}
341
342double studentPDF(double t) {
343 return exp(-0.5 * (DOF + 1) * log(1 + t * t / DOF) + lgamma((DOF + 1.0) / 2.0) - lgamma(DOF / 2.0)) / sqrt(PI * DOF);
344}
345
346double studentCDF(double t) {
347 if (t > 0)
348 return 1 - betaInc(DOF / 2.0, 0.5, DOF / (DOF + t * t)) / 2;
349 else
350 return betaInc(DOF / 2.0, 0.5, DOF / (DOF + t * t)) / 2;
351}
352
353#define LOG2 0.693147180559945
354
355double chiSquaredPDF(double x) {
356 double chiSqr, DOFover2;
357
358 if (x < 0)
359 return 0;
360 chiSqr = x * DOF / sampleMean;
361 DOFover2 = DOF / 2.0;
362 return exp((DOFover2 - 1.0) * log(chiSqr) - chiSqr / 2 - DOFover2 * LOG2 - lgamma(DOFover2));
363}
364
365double chiSquaredCDF(double x) {
366 double chiSqr;
367
368 if (x < 0)
369 x = 0;
370 chiSqr = x * DOF / sampleMean;
371 return 1 - gammaQ(DOF / 2.0, chiSqr / 2.0);
372}
373
374void compareToDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, char **sigmaName, long columnNames, long distCode, long degreesFree, char *dofParameter, short columnMajorOrder, int threads) {
375 SDDS_DATASET SDDSout;
376 double *data, *data1, stat, sigLevel;
377 long iStat, iSigLevel, iCount, iColumnName;
378 long icol;
379 int64_t rows, row;
380 iStat = iSigLevel = iCount = iColumnName = 0;
381 if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, "sddsdistest output", output) ||
382 0 > SDDS_DefineParameter(&SDDSout, "distestDistribution", NULL, NULL, "sddsdistest distribution name", NULL,
383 SDDS_STRING, option[distCode]) ||
384 0 > SDDS_DefineParameter(&SDDSout, "distestTest", NULL, NULL, "sddsdistest test name", NULL, SDDS_STRING, testChoice[testCode]) ||
385 0 > (iCount = SDDS_DefineParameter(&SDDSout, "Count", NULL, NULL, "Number of data points", NULL, SDDS_LONG, 0))) {
386 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
387 }
388 if (columnMajorOrder != -1)
389 SDDSout.layout.data_mode.column_major = columnMajorOrder;
390 else
391 SDDSout.layout.data_mode.column_major = SDDSin->layout.data_mode.column_major;
392 switch (testCode) {
393 case KS_TEST:
394 if (0 > (iColumnName = SDDS_DefineColumn(&SDDSout, "ColumnName", NULL, NULL, "Column analysed by sddsdistest",
395 NULL, SDDS_STRING, 0)) ||
396 0 > (iStat = SDDS_DefineColumn(&SDDSout, "D", NULL, NULL, "Kolmogorov-Smirnov D statistic", NULL, SDDS_DOUBLE, 0)) ||
397 0 > (iSigLevel = SDDS_DefineColumn(&SDDSout, "distestSigLevel", "P(D$ba$n>D)", NULL, "Probability of exceeding D", NULL, SDDS_DOUBLE, 0))) {
398 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
399 }
400 break;
401 case CHI_TEST:
402 if (0 > (iColumnName = SDDS_DefineColumn(&SDDSout, "ColumnName", NULL, NULL, "Column analysed by sddsdistest",
403 NULL, SDDS_STRING, 0)) ||
404 0 > (iStat = SDDS_DefineParameter(&SDDSout, "ChiSquared", "$gh$r$a2$n", NULL, "Chi-squared statistic", NULL,
405 SDDS_DOUBLE, 0)) ||
406 0 > (iSigLevel = SDDS_DefineParameter(&SDDSout, "distestSigLevel", "P($gh$r$a2$n$ba$n>$gh$r$a2$n)", NULL, "Probability of exceeding $gh$r$a2$n", NULL, SDDS_DOUBLE, 0))) {
407 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
408 }
409 break;
410 default:
411 SDDS_Bomb("Invalid testCode seen in compareToDistribution--this shouldn't happen.");
412 break;
413 }
414 if (!SDDS_WriteLayout(&SDDSout))
415 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
416 while (SDDS_ReadPage(SDDSin) > 0) {
417 if (!SDDS_StartPage(&SDDSout, columnNames))
418 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
419 rows = SDDS_CountRowsOfInterest(SDDSin);
420 for (icol = 0; icol < columnNames; icol++) {
421 stat = 0;
422 sigLevel = 1;
423 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, columnName, columnNames, iColumnName) ||
424 !SDDS_SetParameters(&SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iCount, rows, -1)) {
425 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
426 }
427 if (rows >= 2) {
428 if (!(data = SDDS_GetColumnInDoubles(SDDSin, columnName[icol])))
429 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
430 if (dofParameter)
431 SDDS_GetParameterAsLong(SDDSin, dofParameter, &DOF);
432 else
433 DOF = degreesFree;
434 switch (distCode) {
435 case CLO_GAUSSIAN:
436 computeMomentsThreaded(&sampleMean, NULL, &sampleStDev, NULL, data, rows, threads);
437 if (testCode == KS_TEST)
438 ksTestWithFunction(data, rows, gaussianCDF, &stat, &sigLevel);
439 else
440 chiSquaredTestWithFunction(data, rows, gaussianPDF, &stat, &sigLevel);
441 break;
442 case CLO_POISSON:
443 computeMomentsThreaded(&sampleMean, NULL, NULL, NULL, data, rows, threads);
444 if (testCode == KS_TEST)
445 ksTestWithFunction(data, rows, poissonCDF, &stat, &sigLevel);
446 else
447 chiSquaredTestWithFunction(data, rows, poissonPDF, &stat, &sigLevel);
448 break;
449 case CLO_STUDENT:
450 if (DOF < 1)
451 SDDS_Bomb("must have at least one degree of freedom for Student distribution tests");
452 computeMomentsThreaded(&sampleMean, NULL, NULL, NULL, data, rows, threads);
453 if (sigmaName && sigmaName[icol]) {
454 if (!(data1 = SDDS_GetColumnInDoubles(SDDSin, sigmaName[icol])))
455 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
456 for (row = 0; row < rows; row++)
457 /* compute t = (x-mu)/sigma */
458 data[row] = (data[row] - sampleMean) / data1[row];
459 free(data1);
460 } else {
461 for (row = 0; row < rows; row++)
462 data[row] -= sampleMean;
463 }
464 if (testCode == KS_TEST)
465 ksTestWithFunction(data, rows, studentCDF, &stat, &sigLevel);
466 else
467 chiSquaredTestWithFunction(data, rows, studentPDF, &stat, &sigLevel);
468 break;
469 case CLO_CHISQUARED:
470 computeMomentsThreaded(&sampleMean, NULL, NULL, NULL, data, rows, threads);
471 if (DOF < 1)
472 SDDS_Bomb("must have at least one degree of freedom for chi-squared distribution tests");
473 if (testCode == KS_TEST)
474 ksTestWithFunction(data, rows, chiSquaredCDF, &stat, &sigLevel);
475 else
476 chiSquaredTestWithFunction(data, rows, chiSquaredPDF, &stat, &sigLevel);
477 break;
478 default:
479 SDDS_Bomb("Invalid distCode in compareToDistribution--this shouldn't happen");
480 break;
481 }
482 }
483 if (!SDDS_SetRowValues(&SDDSout, SDDS_PASS_BY_VALUE | SDDS_SET_BY_INDEX, icol, iStat, stat, iSigLevel, sigLevel, -1))
484 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
485 }
486 if (!SDDS_WritePage(&SDDSout))
487 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
488 }
489 if (!SDDS_Terminate(&SDDSout)) {
490 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
491 exit(EXIT_FAILURE);
492 }
493}
494
495void chiSquaredTestWithFunction(double *data, int64_t rows, double (*PDF)(double x), double *statReturn, double *sigLevelReturn) {
496 SDDS_Bomb("Chi-squared distribution test not implemented yet---wouldn't you really like a nice K-S test instead?");
497}
498
499void ksTestWithFunction(double *data, int64_t rows, double (*CDF)(double x), double *statReturn, double *sigLevelReturn) {
500 double CDF1, CDF2, dCDF1, dCDF2, CDF0, dCDFmaximum;
501 int64_t row;
502
503 qsort((void *)data, rows, sizeof(*data), double_cmpasc);
504 dCDFmaximum = CDF1 = 0;
505 for (row = 0; row < rows; row++) {
506 CDF0 = (*CDF)(data[row]);
507 CDF2 = (row + 1.0) / rows;
508 dCDF1 = CDF0 - CDF1;
509 if (dCDF1 < 0)
510 dCDF1 = -dCDF1;
511 dCDF2 = CDF0 - CDF2;
512 if (dCDF2 < 0)
513 dCDF2 = -dCDF2;
514 CDF1 = CDF2;
515 if (dCDF1 > dCDFmaximum)
516 dCDFmaximum = dCDF1;
517 if (dCDF2 > dCDFmaximum)
518 dCDFmaximum = dCDF2;
519 }
520 *statReturn = dCDFmaximum;
521 *sigLevelReturn = KS_Qfunction(sqrt((double)rows) * dCDFmaximum);
522}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t * SDDS_GetParameterAsLong(SDDS_DATASET *SDDS_dataset, char *parameter_name, int32_t *memory)
Retrieves the value of a specified parameter as a 32-bit integer from the current data table of a dat...
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
double betaInc(double a, double b, double x)
Compute the incomplete beta function.
Definition betai.c:67
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
double gammaQ(double a, double x)
Compute the regularized upper incomplete gamma function Q(a,x).
Definition gammai.c:64
double KS_Qfunction(double lambda)
Compute the Q-function for the Kolmogorov-Smirnov test.
Definition kstests.c:57
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long scan_item_list(unsigned long *flags, char **item, long *items,...)
Scans a list of items without flag extension and assigns values based on provided keywords and types.
int double_cmpasc(const void *a, const void *b)
Compare two doubles in ascending order.