SDDSlib
Loading...
Searching...
No Matches
median.c File Reference

Computes statistical measures such as median, percentiles, average, and middle values. More...

#include "mdb.h"

Go to the source code of this file.

Functions

long compute_median (double *value, double *x, long n)
 Computes the median of an array of doubles.
 
long compute_percentile (double *value, double *x, long n, double percentile)
 Computes a specific percentile of an array of doubles.
 
long compute_percentiles (double *position, double *percent, long positions, double *x, long n)
 Computes multiple percentiles of an array of doubles.
 
long compute_percentiles_flagged (double *position, double *percent, long positions, double *x, int32_t *keep, int64_t n)
 Computes multiple percentiles of an array of doubles, considering only flagged elements.
 
long compute_average (double *value, double *data, int64_t n)
 Computes the average of an array of doubles.
 
long compute_middle (double *value, double *data, long n)
 Computes the middle value between the minimum and maximum of an array of doubles.
 
long approximate_percentiles (double *position, double *percent, long positions, double *x, long n, long bins)
 Approximates multiple percentiles of an array of doubles using histogram bins.
 

Detailed Description

Computes statistical measures such as median, percentiles, average, and middle values.

This file contains functions to compute median, percentiles, averages, and the middle value of datasets. See also the find_XX() routines in rowmedian.c which return the position of the median and other statistics.

License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, C. Saunders, R. Soliday, Y. Wang

Definition in file median.c.

Function Documentation

◆ approximate_percentiles()

long approximate_percentiles ( double * position,
double * percent,
long positions,
double * x,
long n,
long bins )

Approximates multiple percentiles of an array of doubles using histogram bins.

Parameters
positionPointer to the array to store the computed percentile positions.
percentPointer to the array of percentiles to compute (each value between 0-100).
positionsNumber of percentiles to compute.
xPointer to the array of doubles.
nNumber of elements in the array.
binsNumber of histogram bins to use for approximation.
Returns
Returns 1 on success, 0 on failure.

Definition at line 187 of file median.c.

188 {
189 double *hist, *cdf, xMin, xMax, xCenter, xRange;
190 long i, j, k;
191 if (bins < 2 || positions <= 0 || n <= 0)
192 return 0;
193 if (!(hist = malloc(sizeof(*hist) * bins)))
194 return 0;
195 find_min_max(&xMin, &xMax, x, n);
196 xCenter = (xMax + xMin) / 2;
197 xRange = (xMax - xMin) * (1 + 1. / bins) / 2;
198 xMin = xCenter - xRange;
199 xMax = xCenter + xRange;
200 make_histogram(hist, bins, xMin, xMax, x, n, 1);
201
202 cdf = hist;
203 for (i = 1; i < bins; i++)
204 cdf[i] += cdf[i - 1];
205 for (i = 0; i < bins; i++)
206 cdf[i] /= cdf[bins - 1];
207
208 for (j = 0; j < positions; j++) {
209 for (i = k = 0; i < bins; i++) {
210 if (cdf[i] < percent[j] / 100.0)
211 k = i;
212 else
213 break;
214 }
215 position[j] = xMin + (k * (xMax - xMin)) / bins;
216 }
217 free(hist);
218 return 1;
219}
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.

◆ compute_average()

long compute_average ( double * value,
double * data,
int64_t n )

Computes the average of an array of doubles.

Parameters
valuePointer to store the computed average value.
dataPointer to the array of doubles.
nNumber of elements in the array.
Returns
Returns 1 on success, 0 on failure.

Definition at line 144 of file median.c.

144 {
145 double sum;
146 int64_t i;
147
148 if (n <= 0)
149 return 0;
150
151 for (i = sum = 0; i < n; i++)
152 sum += data[i];
153 *value = sum / n;
154 return 1;
155}

◆ compute_median()

long compute_median ( double * value,
double * x,
long n )

Computes the median of an array of doubles.

Parameters
valuePointer to store the computed median value.
xPointer to the array of doubles.
nNumber of elements in the array.
Returns
Returns 1 on success, 0 on failure.

Definition at line 29 of file median.c.

29 {
30 static double *data = NULL;
31 static long last_n = 0;
32 long i;
33
34 if (n <= 0)
35 return 0;
36 if (n > last_n) {
37 data = trealloc(data, sizeof(*data) * n);
38 last_n = n;
39 }
40 for (i = 0; i < n; i++)
41 data[i] = x[i];
42 qsort((void *)data, n, sizeof(*data), double_cmpasc);
43 *value = data[n / 2];
44 return 1;
45}
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
int double_cmpasc(const void *a, const void *b)
Compare two doubles in ascending order.

◆ compute_middle()

long compute_middle ( double * value,
double * data,
long n )

Computes the middle value between the minimum and maximum of an array of doubles.

Parameters
valuePointer to store the computed middle value.
dataPointer to the array of doubles.
nNumber of elements in the array.
Returns
Returns 1 on success, 0 on failure.

Definition at line 165 of file median.c.

165 {
166 double min, max;
167 if (n <= 0)
168 return 0;
169
170 if (!find_min_max(&min, &max, data, n))
171 return 0;
172 *value = (min + max) / 2;
173 return 1;
174}

◆ compute_percentile()

long compute_percentile ( double * value,
double * x,
long n,
double percentile )

Computes a specific percentile of an array of doubles.

Parameters
valuePointer to store the computed percentile value.
xPointer to the array of doubles.
nNumber of elements in the array.
percentileThe desired percentile to compute (0-100).
Returns
Returns 1 on success, 0 on failure.

Definition at line 56 of file median.c.

56 {
57 static double *data = NULL;
58 static long last_n = 0;
59 long i;
60
61 if (n <= 0 || percentile < 0 || percentile > 100)
62 return 0;
63 if (n > last_n) {
64 data = trealloc(data, sizeof(*data) * n);
65 last_n = n;
66 }
67 for (i = 0; i < n; i++)
68 data[i] = x[i];
69 qsort((void *)data, n, sizeof(*data), double_cmpasc);
70 *value = data[(long)((n - 1) * (percentile / 100.0))];
71 return 1;
72}

◆ compute_percentiles()

long compute_percentiles ( double * position,
double * percent,
long positions,
double * x,
long n )

Computes multiple percentiles of an array of doubles.

Parameters
positionPointer to the array to store the computed percentile values.
percentPointer to the array of percentiles to compute (each value between 0-100).
positionsNumber of percentiles to compute.
xPointer to the array of doubles.
nNumber of elements in the array.
Returns
Returns 1 on success, 0 on failure.

Definition at line 84 of file median.c.

84 {
85 static double *data = NULL;
86 static long last_n = 0;
87 long ip;
88
89 if (n <= 0 || positions <= 0)
90 return 0;
91 if (n > last_n) {
92 data = trealloc(data, sizeof(*data) * n);
93 last_n = n;
94 }
95 memcpy((char *)data, (char *)x, sizeof(*x) * n);
96 qsort((void *)data, n, sizeof(*data), double_cmpasc);
97 for (ip = 0; ip < positions; ip++)
98 position[ip] = data[(long)((n - 1) * (percent[ip] / 100.0))];
99 return 1;
100}

◆ compute_percentiles_flagged()

long compute_percentiles_flagged ( double * position,
double * percent,
long positions,
double * x,
int32_t * keep,
int64_t n )

Computes multiple percentiles of an array of doubles, considering only flagged elements.

Parameters
positionPointer to the array to store the computed percentile values.
percentPointer to the array of percentiles to compute (each value between 0-100).
positionsNumber of percentiles to compute.
xPointer to the array of doubles.
keepPointer to the array of flags indicating which elements to include.
nNumber of elements in the array.
Returns
Returns 1 on success, 0 on failure.

Definition at line 113 of file median.c.

113 {
114 static double *data = NULL;
115 static int64_t last_n = 0;
116 int64_t ip, jp, count;
117
118 if (n <= 0 || positions <= 0)
119 return 0;
120 for (ip=count=0; ip<n; ip++)
121 if (keep[ip])
122 count++;
123 if (count > last_n) {
124 data = trealloc(data, sizeof(*data) * count);
125 last_n = count;
126 }
127 for (ip=jp=0; ip<n; ip++)
128 if (keep[ip])
129 data[jp++] = x[ip];
130 qsort((void *)data, count, sizeof(*data), double_cmpasc);
131 for (ip = 0; ip < positions; ip++)
132 position[ip] = data[(long)((count - 1) * (percent[ip] / 100.0))];
133 return 1;
134}