SDDSlib
Loading...
Searching...
No Matches
median.c
Go to the documentation of this file.
1/**
2 * @file median.c
3 * @brief Computes statistical measures such as median, percentiles, average, and middle values.
4 *
5 * This file contains functions to compute median, percentiles, averages, and the middle value of datasets.
6 * See also the find_XX() routines in rowmedian.c which return the position of the median and other statistics.
7 *
8 * @copyright
9 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
10 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
11 *
12 * @license
13 * This file is distributed under the terms of the Software License Agreement
14 * found in the file LICENSE included with this distribution.
15 *
16 * @author M. Borland, C. Saunders, R. Soliday, Y. Wang
17 */
18
19#include "mdb.h"
20
21/**
22 * @brief Computes the median of an array of doubles.
23 *
24 * @param value Pointer to store the computed median value.
25 * @param x Pointer to the array of doubles.
26 * @param n Number of elements in the array.
27 * @return Returns 1 on success, 0 on failure.
28 */
29long compute_median(double *value, double *x, long n) {
30 static double *data = NULL;
31 static long last_n = 0;
32 long i;
33
34 if (n <= 0)
35 return 0;
36 if (n > last_n) {
37 data = trealloc(data, sizeof(*data) * n);
38 last_n = n;
39 }
40 for (i = 0; i < n; i++)
41 data[i] = x[i];
42 qsort((void *)data, n, sizeof(*data), double_cmpasc);
43 *value = data[n / 2];
44 return 1;
45}
46
47/**
48 * @brief Computes a specific percentile of an array of doubles.
49 *
50 * @param value Pointer to store the computed percentile value.
51 * @param x Pointer to the array of doubles.
52 * @param n Number of elements in the array.
53 * @param percentile The desired percentile to compute (0-100).
54 * @return Returns 1 on success, 0 on failure.
55 */
56long compute_percentile(double *value, double *x, long n, double percentile) {
57 static double *data = NULL;
58 static long last_n = 0;
59 long i;
60
61 if (n <= 0 || percentile < 0 || percentile > 100)
62 return 0;
63 if (n > last_n) {
64 data = trealloc(data, sizeof(*data) * n);
65 last_n = n;
66 }
67 for (i = 0; i < n; i++)
68 data[i] = x[i];
69 qsort((void *)data, n, sizeof(*data), double_cmpasc);
70 *value = data[(long)((n - 1) * (percentile / 100.0))];
71 return 1;
72}
73
74/**
75 * @brief Computes multiple percentiles of an array of doubles.
76 *
77 * @param position Pointer to the array to store the computed percentile values.
78 * @param percent Pointer to the array of percentiles to compute (each value between 0-100).
79 * @param positions Number of percentiles to compute.
80 * @param x Pointer to the array of doubles.
81 * @param n Number of elements in the array.
82 * @return Returns 1 on success, 0 on failure.
83 */
84long compute_percentiles(double *position, double *percent, long positions, double *x, long n) {
85 static double *data = NULL;
86 static long last_n = 0;
87 long ip;
88
89 if (n <= 0 || positions <= 0)
90 return 0;
91 if (n > last_n) {
92 data = trealloc(data, sizeof(*data) * n);
93 last_n = n;
94 }
95 memcpy((char *)data, (char *)x, sizeof(*x) * n);
96 qsort((void *)data, n, sizeof(*data), double_cmpasc);
97 for (ip = 0; ip < positions; ip++)
98 position[ip] = data[(long)((n - 1) * (percent[ip] / 100.0))];
99 return 1;
100}
101
102/**
103 * @brief Computes multiple percentiles of an array of doubles, considering only flagged elements.
104 *
105 * @param position Pointer to the array to store the computed percentile values.
106 * @param percent Pointer to the array of percentiles to compute (each value between 0-100).
107 * @param positions Number of percentiles to compute.
108 * @param x Pointer to the array of doubles.
109 * @param keep Pointer to the array of flags indicating which elements to include.
110 * @param n Number of elements in the array.
111 * @return Returns 1 on success, 0 on failure.
112 */
113long compute_percentiles_flagged(double *position, double *percent, long positions, double *x, int32_t *keep, int64_t n) {
114 static double *data = NULL;
115 static int64_t last_n = 0;
116 int64_t ip, jp, count;
117
118 if (n <= 0 || positions <= 0)
119 return 0;
120 for (ip=count=0; ip<n; ip++)
121 if (keep[ip])
122 count++;
123 if (count > last_n) {
124 data = trealloc(data, sizeof(*data) * count);
125 last_n = count;
126 }
127 for (ip=jp=0; ip<n; ip++)
128 if (keep[ip])
129 data[jp++] = x[ip];
130 qsort((void *)data, count, sizeof(*data), double_cmpasc);
131 for (ip = 0; ip < positions; ip++)
132 position[ip] = data[(long)((count - 1) * (percent[ip] / 100.0))];
133 return 1;
134}
135
136/**
137 * @brief Computes the average of an array of doubles.
138 *
139 * @param value Pointer to store the computed average value.
140 * @param data Pointer to the array of doubles.
141 * @param n Number of elements in the array.
142 * @return Returns 1 on success, 0 on failure.
143 */
144long compute_average(double *value, double *data, int64_t n) {
145 double sum;
146 int64_t i;
147
148 if (n <= 0)
149 return 0;
150
151 for (i = sum = 0; i < n; i++)
152 sum += data[i];
153 *value = sum / n;
154 return 1;
155}
156
157/**
158 * @brief Computes the middle value between the minimum and maximum of an array of doubles.
159 *
160 * @param value Pointer to store the computed middle value.
161 * @param data Pointer to the array of doubles.
162 * @param n Number of elements in the array.
163 * @return Returns 1 on success, 0 on failure.
164 */
165long compute_middle(double *value, double *data, long n) {
166 double min, max;
167 if (n <= 0)
168 return 0;
169
170 if (!find_min_max(&min, &max, data, n))
171 return 0;
172 *value = (min + max) / 2;
173 return 1;
174}
175
176/**
177 * @brief Approximates multiple percentiles of an array of doubles using histogram bins.
178 *
179 * @param position Pointer to the array to store the computed percentile positions.
180 * @param percent Pointer to the array of percentiles to compute (each value between 0-100).
181 * @param positions Number of percentiles to compute.
182 * @param x Pointer to the array of doubles.
183 * @param n Number of elements in the array.
184 * @param bins Number of histogram bins to use for approximation.
185 * @return Returns 1 on success, 0 on failure.
186 */
187long approximate_percentiles(double *position, double *percent, long positions, double *x, long n,
188 long bins) {
189 double *hist, *cdf, xMin, xMax, xCenter, xRange;
190 long i, j, k;
191 if (bins < 2 || positions <= 0 || n <= 0)
192 return 0;
193 if (!(hist = malloc(sizeof(*hist) * bins)))
194 return 0;
195 find_min_max(&xMin, &xMax, x, n);
196 xCenter = (xMax + xMin) / 2;
197 xRange = (xMax - xMin) * (1 + 1. / bins) / 2;
198 xMin = xCenter - xRange;
199 xMax = xCenter + xRange;
200 make_histogram(hist, bins, xMin, xMax, x, n, 1);
201
202 cdf = hist;
203 for (i = 1; i < bins; i++)
204 cdf[i] += cdf[i - 1];
205 for (i = 0; i < bins; i++)
206 cdf[i] /= cdf[bins - 1];
207
208 for (j = 0; j < positions; j++) {
209 for (i = k = 0; i < bins; i++) {
210 if (cdf[i] < percent[j] / 100.0)
211 k = i;
212 else
213 break;
214 }
215 position[j] = xMin + (k * (xMax - xMin)) / bins;
216 }
217 free(hist);
218 return 1;
219}
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long approximate_percentiles(double *position, double *percent, long positions, double *x, long n, long bins)
Approximates multiple percentiles of an array of doubles using histogram bins.
Definition median.c:187
long compute_average(double *value, double *data, int64_t n)
Computes the average of an array of doubles.
Definition median.c:144
long compute_middle(double *value, double *data, long n)
Computes the middle value between the minimum and maximum of an array of doubles.
Definition median.c:165
long compute_percentiles(double *position, double *percent, long positions, double *x, long n)
Computes multiple percentiles of an array of doubles.
Definition median.c:84
long compute_percentiles_flagged(double *position, double *percent, long positions, double *x, int32_t *keep, int64_t n)
Computes multiple percentiles of an array of doubles, considering only flagged elements.
Definition median.c:113
long compute_percentile(double *value, double *x, long n, double percentile)
Computes a specific percentile of an array of doubles.
Definition median.c:56
long compute_median(double *value, double *x, long n)
Computes the median of an array of doubles.
Definition median.c:29
int double_cmpasc(const void *a, const void *b)
Compare two doubles in ascending order.