SDDSlib
Loading...
Searching...
No Matches
sddsdistest.c File Reference

Tests data against various statistical distributions. More...

#include "mdb.h"
#include "SDDS.h"
#include "SDDSutils.h"
#include "scan.h"

Go to the source code of this file.

Macros

#define KS_TEST   0
 
#define CHI_TEST   1
 
#define N_TESTS   2
 
#define POISSON_ACCURACY   (1e-8)
 
#define LOG2   0.693147180559945
 

Enumerations

enum  option_type {
  CLO_PIPE , CLO_COLUMN , CLO_TEST , CLO_FILEDIST ,
  CLO_GAUSSIAN , CLO_POISSON , CLO_STUDENT , CLO_CHISQUARED ,
  CLO_DOF , CLO_EXCLUDE , CLO_MAJOR_ORDER , CLO_THREADS ,
  N_OPTIONS
}
 

Functions

void compareToFileDistribution (char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, long columnNames, char *distFile, char *distFileIndep, char *distFileDepen)
 
void compareToDistribution (char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, char **sigmaName, long columnNames, long distCode, long degreesFree, char *dofParameter, short columnMajorOrder, int threads)
 
void ksTestWithFunction (double *data, int64_t rows, double(*CDF)(double x), double *statReturn, double *sigLevelReturn)
 
void chiSquaredTestWithFunction (double *data, int64_t rows, double(*PDF)(double x), double *statReturn, double *sigLevelReturn)
 
int main (int argc, char **argv)
 
double gaussianPDF (double x)
 
double gaussianCDF (double x)
 
double poissonPDF (double xd)
 
double poissonCDF (double xd)
 
double studentPDF (double t)
 
double studentCDF (double t)
 
double chiSquaredPDF (double x)
 
double chiSquaredCDF (double x)
 

Variables

static char * USAGE
 
static char * option [N_OPTIONS]
 
static char * testChoice [N_TESTS]
 
static double sampleMean
 
static double sampleStDev
 
static int32_t DOF
 

Detailed Description

Tests data against various statistical distributions.

This program reads input data, performs statistical tests (Kolmogorov-Smirnov or Chi-Squared) against specified distributions (Gaussian, Poisson, Student's t, or user-defined via a file), and outputs the results. It supports multithreading and can handle data in either row-major or column-major order.

Usage:

sddsdistest [<input>] [<output>]
[-pipe=[in][,out]]
-column=<name>[,sigma=<name>]...
-exclude=<name>[,...]
[-degreesOfFreedom={<value>|@<parameterName>}]
[-test={ks|chisquared}]
[{-fileDistribution=<filename>,<indepName>,<depenName> |
-gaussian | -poisson | -student | -chisquared}]
[-majorOrder=row|column]
[-threads=<number>]
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, Xuesong Jiao, H. Shang

Definition in file sddsdistest.c.

Macro Definition Documentation

◆ CHI_TEST

#define CHI_TEST   1

Definition at line 124 of file sddsdistest.c.

◆ KS_TEST

#define KS_TEST   0

Definition at line 123 of file sddsdistest.c.

◆ LOG2

#define LOG2   0.693147180559945

Definition at line 353 of file sddsdistest.c.

◆ N_TESTS

#define N_TESTS   2

Definition at line 125 of file sddsdistest.c.

◆ POISSON_ACCURACY

#define POISSON_ACCURACY   (1e-8)

Definition at line 312 of file sddsdistest.c.

Enumeration Type Documentation

◆ option_type

enum option_type

Definition at line 92 of file sddsdistest.c.

92 {
93 CLO_PIPE,
94 CLO_COLUMN,
95 CLO_TEST,
96 CLO_FILEDIST,
97 CLO_GAUSSIAN,
98 CLO_POISSON,
99 CLO_STUDENT,
100 CLO_CHISQUARED,
101 CLO_DOF,
102 CLO_EXCLUDE,
103 CLO_MAJOR_ORDER,
104 CLO_THREADS,
105 N_OPTIONS
106};

Function Documentation

◆ chiSquaredCDF()

double chiSquaredCDF ( double x)

Definition at line 365 of file sddsdistest.c.

365 {
366 double chiSqr;
367
368 if (x < 0)
369 x = 0;
370 chiSqr = x * DOF / sampleMean;
371 return 1 - gammaQ(DOF / 2.0, chiSqr / 2.0);
372}
double gammaQ(double a, double x)
Compute the regularized upper incomplete gamma function Q(a,x).
Definition gammai.c:64

◆ chiSquaredPDF()

double chiSquaredPDF ( double x)

Definition at line 355 of file sddsdistest.c.

355 {
356 double chiSqr, DOFover2;
357
358 if (x < 0)
359 return 0;
360 chiSqr = x * DOF / sampleMean;
361 DOFover2 = DOF / 2.0;
362 return exp((DOFover2 - 1.0) * log(chiSqr) - chiSqr / 2 - DOFover2 * LOG2 - lgamma(DOFover2));
363}

◆ chiSquaredTestWithFunction()

void chiSquaredTestWithFunction ( double * data,
int64_t rows,
double(* PDF )(double x),
double * statReturn,
double * sigLevelReturn )

Definition at line 495 of file sddsdistest.c.

495 {
496 SDDS_Bomb("Chi-squared distribution test not implemented yet---wouldn't you really like a nice K-S test instead?");
497}
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342

◆ compareToDistribution()

void compareToDistribution ( char * output,
long testCode,
SDDS_DATASET * SDDSin,
char ** columnName,
char ** sigmaName,
long columnNames,
long distCode,
long degreesFree,
char * dofParameter,
short columnMajorOrder,
int threads )

Definition at line 374 of file sddsdistest.c.

374 {
375 SDDS_DATASET SDDSout;
376 double *data, *data1, stat, sigLevel;
377 long iStat, iSigLevel, iCount, iColumnName;
378 long icol;
379 int64_t rows, row;
380 iStat = iSigLevel = iCount = iColumnName = 0;
381 if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, "sddsdistest output", output) ||
382 0 > SDDS_DefineParameter(&SDDSout, "distestDistribution", NULL, NULL, "sddsdistest distribution name", NULL,
383 SDDS_STRING, option[distCode]) ||
384 0 > SDDS_DefineParameter(&SDDSout, "distestTest", NULL, NULL, "sddsdistest test name", NULL, SDDS_STRING, testChoice[testCode]) ||
385 0 > (iCount = SDDS_DefineParameter(&SDDSout, "Count", NULL, NULL, "Number of data points", NULL, SDDS_LONG, 0))) {
386 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
387 }
388 if (columnMajorOrder != -1)
389 SDDSout.layout.data_mode.column_major = columnMajorOrder;
390 else
391 SDDSout.layout.data_mode.column_major = SDDSin->layout.data_mode.column_major;
392 switch (testCode) {
393 case KS_TEST:
394 if (0 > (iColumnName = SDDS_DefineColumn(&SDDSout, "ColumnName", NULL, NULL, "Column analysed by sddsdistest",
395 NULL, SDDS_STRING, 0)) ||
396 0 > (iStat = SDDS_DefineColumn(&SDDSout, "D", NULL, NULL, "Kolmogorov-Smirnov D statistic", NULL, SDDS_DOUBLE, 0)) ||
397 0 > (iSigLevel = SDDS_DefineColumn(&SDDSout, "distestSigLevel", "P(D$ba$n>D)", NULL, "Probability of exceeding D", NULL, SDDS_DOUBLE, 0))) {
398 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
399 }
400 break;
401 case CHI_TEST:
402 if (0 > (iColumnName = SDDS_DefineColumn(&SDDSout, "ColumnName", NULL, NULL, "Column analysed by sddsdistest",
403 NULL, SDDS_STRING, 0)) ||
404 0 > (iStat = SDDS_DefineParameter(&SDDSout, "ChiSquared", "$gh$r$a2$n", NULL, "Chi-squared statistic", NULL,
405 SDDS_DOUBLE, 0)) ||
406 0 > (iSigLevel = SDDS_DefineParameter(&SDDSout, "distestSigLevel", "P($gh$r$a2$n$ba$n>$gh$r$a2$n)", NULL, "Probability of exceeding $gh$r$a2$n", NULL, SDDS_DOUBLE, 0))) {
407 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
408 }
409 break;
410 default:
411 SDDS_Bomb("Invalid testCode seen in compareToDistribution--this shouldn't happen.");
412 break;
413 }
414 if (!SDDS_WriteLayout(&SDDSout))
415 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
416 while (SDDS_ReadPage(SDDSin) > 0) {
417 if (!SDDS_StartPage(&SDDSout, columnNames))
418 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
419 rows = SDDS_CountRowsOfInterest(SDDSin);
420 for (icol = 0; icol < columnNames; icol++) {
421 stat = 0;
422 sigLevel = 1;
423 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_INDEX, columnName, columnNames, iColumnName) ||
424 !SDDS_SetParameters(&SDDSout, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, iCount, rows, -1)) {
425 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
426 }
427 if (rows >= 2) {
428 if (!(data = SDDS_GetColumnInDoubles(SDDSin, columnName[icol])))
429 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
430 if (dofParameter)
431 SDDS_GetParameterAsLong(SDDSin, dofParameter, &DOF);
432 else
433 DOF = degreesFree;
434 switch (distCode) {
435 case CLO_GAUSSIAN:
436 computeMomentsThreaded(&sampleMean, NULL, &sampleStDev, NULL, data, rows, threads);
437 if (testCode == KS_TEST)
438 ksTestWithFunction(data, rows, gaussianCDF, &stat, &sigLevel);
439 else
440 chiSquaredTestWithFunction(data, rows, gaussianPDF, &stat, &sigLevel);
441 break;
442 case CLO_POISSON:
443 computeMomentsThreaded(&sampleMean, NULL, NULL, NULL, data, rows, threads);
444 if (testCode == KS_TEST)
445 ksTestWithFunction(data, rows, poissonCDF, &stat, &sigLevel);
446 else
447 chiSquaredTestWithFunction(data, rows, poissonPDF, &stat, &sigLevel);
448 break;
449 case CLO_STUDENT:
450 if (DOF < 1)
451 SDDS_Bomb("must have at least one degree of freedom for Student distribution tests");
452 computeMomentsThreaded(&sampleMean, NULL, NULL, NULL, data, rows, threads);
453 if (sigmaName && sigmaName[icol]) {
454 if (!(data1 = SDDS_GetColumnInDoubles(SDDSin, sigmaName[icol])))
455 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
456 for (row = 0; row < rows; row++)
457 /* compute t = (x-mu)/sigma */
458 data[row] = (data[row] - sampleMean) / data1[row];
459 free(data1);
460 } else {
461 for (row = 0; row < rows; row++)
462 data[row] -= sampleMean;
463 }
464 if (testCode == KS_TEST)
465 ksTestWithFunction(data, rows, studentCDF, &stat, &sigLevel);
466 else
467 chiSquaredTestWithFunction(data, rows, studentPDF, &stat, &sigLevel);
468 break;
469 case CLO_CHISQUARED:
470 computeMomentsThreaded(&sampleMean, NULL, NULL, NULL, data, rows, threads);
471 if (DOF < 1)
472 SDDS_Bomb("must have at least one degree of freedom for chi-squared distribution tests");
473 if (testCode == KS_TEST)
474 ksTestWithFunction(data, rows, chiSquaredCDF, &stat, &sigLevel);
475 else
476 chiSquaredTestWithFunction(data, rows, chiSquaredPDF, &stat, &sigLevel);
477 break;
478 default:
479 SDDS_Bomb("Invalid distCode in compareToDistribution--this shouldn't happen");
480 break;
481 }
482 }
483 if (!SDDS_SetRowValues(&SDDSout, SDDS_PASS_BY_VALUE | SDDS_SET_BY_INDEX, icol, iStat, stat, iSigLevel, sigLevel, -1))
484 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
485 }
486 if (!SDDS_WritePage(&SDDSout))
487 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
488 }
489 if (!SDDS_Terminate(&SDDSout)) {
490 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
491 exit(EXIT_FAILURE);
492 }
493}
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t * SDDS_GetParameterAsLong(SDDS_DATASET *SDDS_dataset, char *parameter_name, int32_t *memory)
Retrieves the value of a specified parameter as a 32-bit integer from the current data table of a dat...
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127

◆ compareToFileDistribution()

void compareToFileDistribution ( char * output,
long testCode,
SDDS_DATASET * SDDSin,
char ** columnName,
long columnNames,
char * distFile,
char * distFileIndep,
char * distFileDepen )

Definition at line 277 of file sddsdistest.c.

277 {
278 SDDS_DATASET SDDSdist;
279 if (!SDDS_InitializeInput(&SDDSdist, distFile))
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281 if (SDDS_CheckColumn(&SDDSdist, distFileIndep, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OKAY ||
282 SDDS_CheckColumn(&SDDSdist, distFileDepen, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OKAY)
283 exit(EXIT_FAILURE);
284 if (!SDDS_Terminate(&SDDSdist)) {
285 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
286 exit(EXIT_FAILURE);
287 }
288 SDDS_Bomb("-fileDistribution option not implemented yet");
289}
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157

◆ gaussianCDF()

double gaussianCDF ( double x)

Definition at line 300 of file sddsdistest.c.

300 {
301 long zSign;
302 double z;
303 z = (x - sampleMean) / sampleStDev;
304 zSign = 1;
305 if (z < 0) {
306 zSign = -1;
307 z = -z;
308 }
309 return (1 + zSign * erf(z / SQRT2)) / 2.0;
310}

◆ gaussianPDF()

double gaussianPDF ( double x)

Definition at line 294 of file sddsdistest.c.

294 {
295 double z;
296 z = (x - sampleMean) / sampleStDev;
297 return exp(-z * z / 2) / sqrt(PIx2);
298}

◆ ksTestWithFunction()

void ksTestWithFunction ( double * data,
int64_t rows,
double(* CDF )(double x),
double * statReturn,
double * sigLevelReturn )

Definition at line 499 of file sddsdistest.c.

499 {
500 double CDF1, CDF2, dCDF1, dCDF2, CDF0, dCDFmaximum;
501 int64_t row;
502
503 qsort((void *)data, rows, sizeof(*data), double_cmpasc);
504 dCDFmaximum = CDF1 = 0;
505 for (row = 0; row < rows; row++) {
506 CDF0 = (*CDF)(data[row]);
507 CDF2 = (row + 1.0) / rows;
508 dCDF1 = CDF0 - CDF1;
509 if (dCDF1 < 0)
510 dCDF1 = -dCDF1;
511 dCDF2 = CDF0 - CDF2;
512 if (dCDF2 < 0)
513 dCDF2 = -dCDF2;
514 CDF1 = CDF2;
515 if (dCDF1 > dCDFmaximum)
516 dCDFmaximum = dCDF1;
517 if (dCDF2 > dCDFmaximum)
518 dCDFmaximum = dCDF2;
519 }
520 *statReturn = dCDFmaximum;
521 *sigLevelReturn = KS_Qfunction(sqrt((double)rows) * dCDFmaximum);
522}
double KS_Qfunction(double lambda)
Compute the Q-function for the Kolmogorov-Smirnov test.
Definition kstests.c:57
int double_cmpasc(const void *a, const void *b)
Compare two doubles in ascending order.

◆ main()

int main ( int argc,
char ** argv )

Definition at line 131 of file sddsdistest.c.

131 {
132 int iArg;
133 unsigned long dummyFlags, pipeFlags, majorOrderFlag;
134 SCANNED_ARG *scanned;
135 SDDS_DATASET SDDSin;
136 char *input = NULL, *output = NULL, *distFile = NULL, **columnName = NULL, **sigmaName = NULL, **excludeName = NULL, *distFileIndep = NULL, *distFileDepen = NULL;
137 long testCode = 0, distCode = -1, code, degreesFree = -1, columnNames = 0, excludeNames = 0;
138 char *dofParameter = NULL;
139 short columnMajorOrder = -1;
140 int threads = 1;
141
143 argc = scanargs(&scanned, argc, argv);
144 if (argc < 3)
145 bomb(NULL, USAGE);
146
147 pipeFlags = 0;
148
149 for (iArg = 1; iArg < argc; iArg++) {
150 if (scanned[iArg].arg_type == OPTION) {
151 /* process options here */
152 code = match_string(scanned[iArg].list[0], option, N_OPTIONS, 0);
153 switch (code) {
154 case CLO_MAJOR_ORDER:
155 majorOrderFlag = 0;
156 scanned[iArg].n_items--;
157 if (scanned[iArg].n_items > 0 &&
158 !scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
159 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
160 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)) {
161 SDDS_Bomb("invalid -majorOrder syntax/values");
162 }
163 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
164 columnMajorOrder = 1;
165 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
166 columnMajorOrder = 0;
167 break;
168 case CLO_PIPE:
169 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
170 SDDS_Bomb("invalid -pipe syntax/values");
171 break;
172 case CLO_COLUMN:
173 if ((scanned[iArg].n_items != 2 && scanned[iArg].n_items != 3) ||
174 SDDS_StringIsBlank(scanned[iArg].list[1])) {
175 SDDS_Bomb("invalid -column syntax/values");
176 }
177 moveToStringArray(&columnName, &columnNames, scanned[iArg].list + 1, 1);
178 sigmaName = SDDS_Realloc(sigmaName, sizeof(*sigmaName) * columnNames);
179 if (scanned[iArg].n_items == 3) {
180 scanned[iArg].n_items -= 2;
181 if (!scan_item_list(&dummyFlags, scanned[iArg].list + 2, &scanned[iArg].n_items, "sigma",
182 SDDS_STRING, sigmaName + columnNames - 1, 1, 1, NULL) ||
183 dummyFlags != 1 ||
184 SDDS_StringIsBlank(sigmaName[columnNames - 1])) {
185 SDDS_Bomb("invalid -column syntax/values");
186 }
187 } else {
188 sigmaName[columnNames - 1] = NULL;
189 }
190 break;
191 case CLO_TEST:
192 if (scanned[iArg].n_items != 2 ||
193 (testCode = match_string(scanned[iArg].list[1], testChoice, N_TESTS, 0)) < 0) {
194 SDDS_Bomb("invalid -test syntax/values");
195 }
196 break;
197 case CLO_FILEDIST:
198 if (scanned[iArg].n_items != 4) {
199 SDDS_Bomb("too few qualifiers for -fileDistribution");
200 }
201 if (SDDS_StringIsBlank(distFile = scanned[iArg].list[1]) ||
202 SDDS_StringIsBlank(distFileIndep = scanned[iArg].list[2]) ||
203 SDDS_StringIsBlank(distFileDepen = scanned[iArg].list[3])) {
204 SDDS_Bomb("invalid -fileDistribution values");
205 }
206 break;
207 case CLO_GAUSSIAN:
208 case CLO_POISSON:
209 case CLO_STUDENT:
210 case CLO_CHISQUARED:
211 distCode = code;
212 break;
213 case CLO_DOF:
214 if (scanned[iArg].n_items != 2) {
215 SDDS_Bomb("too few qualifiers for -degreesOfFreedom");
216 }
217 if (scanned[iArg].list[1][0] == '@') {
218 if (!SDDS_CopyString(&dofParameter, scanned[iArg].list[1] + 1)) {
219 SDDS_Bomb("memory allocation failure");
220 }
221 } else if (sscanf(scanned[iArg].list[1], "%ld", &degreesFree) != 1 || degreesFree <= 1) {
222 SDDS_Bomb("invalid degrees-of-freedom given for -student/-chiSquared");
223 }
224 break;
225 case CLO_EXCLUDE:
226 if (scanned[iArg].n_items < 2 || SDDS_StringIsBlank(scanned[iArg].list[1])) {
227 SDDS_Bomb("invalid -exclude syntax/values");
228 }
229 moveToStringArray(&excludeName, &excludeNames, scanned[iArg].list + 1, scanned[iArg].n_items - 1);
230 break;
231 case CLO_THREADS:
232 if (scanned[iArg].n_items != 2 ||
233 !sscanf(scanned[iArg].list[1], "%d", &threads) ||
234 threads < 1) {
235 SDDS_Bomb("invalid -threads syntax");
236 }
237 break;
238 default:
239 fprintf(stderr, "error: unknown/ambiguous option: %s (%s)\n", scanned[iArg].list[0], argv[0]);
240 exit(EXIT_FAILURE);
241 }
242 } else {
243 if (!input)
244 input = scanned[iArg].list[0];
245 else if (!output)
246 output = scanned[iArg].list[0];
247 else
248 SDDS_Bomb("too many filenames seen");
249 }
250 }
251
252 processFilenames("sddsdistest", &input, &output, pipeFlags, 0, NULL);
253
254 if (!SDDS_InitializeInput(&SDDSin, input))
255 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
256 if (!columnNames)
257 SDDS_Bomb("-column option must be supplied");
258 if (!(columnNames = expandColumnPairNames(&SDDSin, &columnName, &sigmaName, columnNames, excludeName, excludeNames, FIND_NUMERIC_TYPE, 0))) {
259 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
260 SDDS_Bomb("named columns nonexistent or nonnumerical");
261 }
262 if (dofParameter && SDDS_CheckParameter(&SDDSin, dofParameter, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OKAY)
263 SDDS_Bomb("degrees-of-freedom parameter not found");
264
265 if (distFile)
266 compareToFileDistribution(output, testCode, &SDDSin, columnName, columnNames, distFile, distFileIndep, distFileDepen);
267 else
268 compareToDistribution(output, testCode, &SDDSin, columnName, sigmaName, columnNames, distCode, degreesFree, dofParameter, columnMajorOrder, threads);
269
270 if (!SDDS_Terminate(&SDDSin)) {
271 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
272 return EXIT_FAILURE;
273 }
274 return EXIT_SUCCESS;
275}
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_StringIsBlank(char *s)
Checks if a string is blank (contains only whitespace characters).
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
long scan_item_list(unsigned long *flags, char **item, long *items,...)
Scans a list of items without flag extension and assigns values based on provided keywords and types.

◆ poissonCDF()

double poissonCDF ( double xd)

Definition at line 322 of file sddsdistest.c.

322 {
323 double CDF, term, accuracy;
324 long x, n;
325
326 if (xd < 0)
327 xd = 0;
328 x = xd;
329 xd = x;
330 term = 1;
331 CDF = 1;
332 accuracy = POISSON_ACCURACY / exp(-sampleMean);
333 for (n = 1; n <= x; n++) {
334 term *= sampleMean / n;
335 CDF += term;
336 if (term < accuracy)
337 break;
338 }
339 return CDF * exp(-sampleMean);
340}

◆ poissonPDF()

double poissonPDF ( double xd)

Definition at line 314 of file sddsdistest.c.

314 {
315 long x;
316
317 if ((x = xd) < 0)
318 return 0;
319 return exp(-sampleMean + x * log(sampleMean) - lgamma(x + 1.0));
320}

◆ studentCDF()

double studentCDF ( double t)

Definition at line 346 of file sddsdistest.c.

346 {
347 if (t > 0)
348 return 1 - betaInc(DOF / 2.0, 0.5, DOF / (DOF + t * t)) / 2;
349 else
350 return betaInc(DOF / 2.0, 0.5, DOF / (DOF + t * t)) / 2;
351}
double betaInc(double a, double b, double x)
Compute the incomplete beta function.
Definition betai.c:67

◆ studentPDF()

double studentPDF ( double t)

Definition at line 342 of file sddsdistest.c.

342 {
343 return exp(-0.5 * (DOF + 1) * log(1 + t * t / DOF) + lgamma((DOF + 1.0) / 2.0) - lgamma(DOF / 2.0)) / sqrt(PI * DOF);
344}

Variable Documentation

◆ DOF

int32_t DOF
static

Definition at line 292 of file sddsdistest.c.

◆ option

char* option[N_OPTIONS]
static
Initial value:
= {
"pipe",
"column",
"test",
"filedistribution",
"gaussian",
"poisson",
"student",
"chisquared",
"degreesoffreedom",
"exclude",
"majorOrder",
"threads"
}

Definition at line 108 of file sddsdistest.c.

108 {
109 "pipe",
110 "column",
111 "test",
112 "filedistribution",
113 "gaussian",
114 "poisson",
115 "student",
116 "chisquared",
117 "degreesoffreedom",
118 "exclude",
119 "majorOrder",
120 "threads"
121};

◆ sampleMean

double sampleMean
static

Definition at line 291 of file sddsdistest.c.

◆ sampleStDev

double sampleStDev
static

Definition at line 291 of file sddsdistest.c.

◆ testChoice

char* testChoice[N_TESTS]
static
Initial value:
= {
"ks",
"chisquared",
}

Definition at line 126 of file sddsdistest.c.

126 {
127 "ks",
128 "chisquared",
129};

◆ USAGE

char* USAGE
static

Definition at line 52 of file sddsdistest.c.