SDDSlib
Loading...
Searching...
No Matches
lincorr.c
Go to the documentation of this file.
1/**
2 * @file lincorr.c
3 * @brief Functions for computing linear correlation coefficients and their significance.
4 *
5 * @copyright
6 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
7 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
8 *
9 * @license
10 * This file is distributed under the terms of the Software License Agreement
11 * found in the file LICENSE included with this distribution.
12 *
13 * @author M. Borland, R. Soliday
14 */
15#include "mdb.h"
16
17/**
18 * @brief Compute the linear correlation coefficient for two data sets.
19 *
20 * Given two arrays of data and optional acceptance arrays, this function calculates
21 * the linear correlation coefficient (r) between the two data sets.
22 * Invalid values (NaN or Inf) and unaccepted entries are skipped.
23 *
24 * @param data1 First data array.
25 * @param data2 Second data array.
26 * @param accept1 Optional acceptance array for data1, may be NULL.
27 * @param accept2 Optional acceptance array for data2, may be NULL.
28 * @param rows Number of elements in each data array.
29 * @param count Pointer to a long to store the number of valid paired samples used.
30 * @return The linear correlation coefficient (between -1 and 1).
31 */
32double linearCorrelationCoefficient(double *data1, double *data2,
33 short *accept1, short *accept2,
34 long rows, long *count) {
35 double sum1[2] = {0, 0}, sum2[2] = {0, 0}, sum12 = 0;
36 double d1, d2, r;
37 long i;
38 long count1;
39
40 count1 = 0;
41 for (i = 0; i < rows; i++) {
42 if (isnan(data1[i]) || isnan(data2[i]) || isinf(data1[i]) || isinf(data2[i]))
43 continue;
44 if ((accept1 && !accept1[i]) || (accept2 && !accept2[i]))
45 continue;
46 count1 += 1;
47 sum1[0] += data1[i];
48 sum1[1] += data1[i] * data1[i];
49 sum2[0] += data2[i];
50 sum2[1] += data2[i] * data2[i];
51 sum12 += data1[i] * data2[i];
52 }
53 if (count)
54 *count = count1;
55 d1 = count1 * sum1[1] - sum1[0] * sum1[0];
56 d2 = count1 * sum2[1] - sum2[0] * sum2[0];
57 if (d1 <= 0 || d2 <= 0)
58 return 0.0;
59 if ((d1 *= d2) <= 0)
60 return 0.0;
61 r = (count1 * sum12 - sum1[0] * sum2[0]) / sqrt(d1);
62 if (r < -1)
63 r = -1;
64 else if (r > 1)
65 r = 1;
66 return r;
67}
68
69/**
70 * @brief Compute the statistical significance of a linear correlation coefficient.
71 *
72 * Given a correlation coefficient and the sample size, this function returns the
73 * significance level, i.e., the probability of observing such a correlation by chance.
74 *
75 * @param r The linear correlation coefficient.
76 * @param rows The number of samples used to compute r.
77 * @return The significance level as a probability (0 to 1).
78 */
79double linearCorrelationSignificance(double r, long rows) {
80 if (rows < 2)
81 return 1.0;
82 if ((r = fabs(r)) > 1)
83 r = 1;
84 return rSigLevel(r, rows - 2);
85}
86
87/**
88 * @brief Compute the linear correlation coefficient between two data sets with a given shift.
89 *
90 * Similar to linearCorrelationCoefficient(), but applies a time shift (lag) between the data sets.
91 * One array is shifted relative to the other, and only overlapping values are considered.
92 *
93 * @param data1 First data array.
94 * @param data2 Second data array.
95 * @param accept1 Optional acceptance array for data1, may be NULL.
96 * @param accept2 Optional acceptance array for data2, may be NULL.
97 * @param rows Number of elements in each data array.
98 * @param count Pointer to a long to store the number of valid paired samples used after shifting.
99 * @param shift The integer shift to apply (positive or negative).
100 * @return The linear correlation coefficient (between -1 and 1) for the shifted data.
101 */
102double shiftedLinearCorrelationCoefficient(double *data1, double *data2,
103 short *accept1, short *accept2,
104 long rows, long *count, long shift) {
105 double sum1[2] = {0, 0}, sum2[2] = {0, 0}, sum12 = 0;
106 double d1, d2, r;
107 long i, i1, i2, is;
108
109 if (shift > 0) {
110 i1 = shift;
111 i2 = rows;
112 } else {
113 i1 = 0;
114 i2 = rows + shift;
115 }
116 *count = 0;
117 for (i = i1; i < i2; i++) {
118 is = i - shift;
119 if (is < 0 || is >= rows) {
120 fprintf(stderr, "shift limits set incorrectly\n");
121 exit(1);
122 }
123 if (isnan(data1[i]) || isnan(data2[is]) || isinf(data1[i]) || isinf(data2[is]))
124 continue;
125 if ((accept1 && !accept1[i]) || (accept2 && !accept2[is]))
126 continue;
127 *count += 1;
128 sum1[0] += data1[i];
129 sum1[1] += data1[i] * data1[i];
130 sum2[0] += data2[is];
131 sum2[1] += data2[is] * data2[is];
132 sum12 += data1[i] * data2[is];
133 }
134 if (!*count)
135 return 0.0;
136 d1 = (*count) * sum1[1] - sum1[0] * sum1[0];
137 d2 = (*count) * sum2[1] - sum2[0] * sum2[0];
138 if (d1 <= 0 || d2 <= 0)
139 return 0.0;
140 if ((d1 *= d2) <= 0)
141 return 0.0;
142 r = ((*count) * sum12 - sum1[0] * sum2[0]) / sqrt(d1);
143 if (r < -1)
144 r = -1;
145 else if (r > 1)
146 r = 1;
147 return r;
148}
double linearCorrelationSignificance(double r, long rows)
Compute the statistical significance of a linear correlation coefficient.
Definition lincorr.c:79
double shiftedLinearCorrelationCoefficient(double *data1, double *data2, short *accept1, short *accept2, long rows, long *count, long shift)
Compute the linear correlation coefficient between two data sets with a given shift.
Definition lincorr.c:102
double linearCorrelationCoefficient(double *data1, double *data2, short *accept1, short *accept2, long rows, long *count)
Compute the linear correlation coefficient for two data sets.
Definition lincorr.c:32
double rSigLevel(double r0, long nu)
Computes the probability that the linear correlation coefficient exceeds a given value.
Definition sigLevel.c:120