SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddslocaldensity.c
Go to the documentation of this file.
1/**
2 * @file sddslocaldensity.c
3 * @brief Computes the local density of data points using methods such as fraction, spread, or Kernel Density Estimation (KDE).
4 *
5 * @details
6 * This program processes input data to compute local densities using specified methods such as
7 * `fraction`, `spread`, or `KDE`. KDE supports advanced configuration options for binning, grid
8 * output, sampling, and bandwidth adjustments. The tool supports multithreading for improved
9 * performance and can handle large datasets efficiently.
10 *
11 * @section Usage
12 * ```
13 * sddslocaldensity [<inputfile>] [<outputfile>]
14 * [-pipe=[input][,output]]
15 * -columns=<normalizationMode>,<name>[,...]
16 * {
17 * -fraction=<value> |
18 * -spread=<value> |
19 * -kde=bins=<number>[,gridoutput=<filename>][,nsigma=<value>][,explimit=<value>]
20 * [,sample=<fraction>|use=<number>][,spanPages]
21 * }
22 * [-output=<columnName>]
23 * [-weight=<columnName>]
24 * [-threads=<number>]
25 * [-verbose]
26 * ```
27 *
28 * @section Options
29 * | Required | Description |
30 * |---------------------|---------------------------------------------------------------------------------------|
31 * | `-columns` | Specifies columns to include and their normalization mode (`none`, `range`, or `rms`). |
32 *
33 * | Option | Description |
34 * |---------------------|------------------------------------------------------------------------------------------------------|
35 * | `-pipe` | Standard SDDS Toolkit pipe option for input and output redirection. |
36 * | `-threads` | Specifies the number of threads to use for computation. |
37 * | `-fraction` | Fraction of the range to identify "nearby" points. |
38 * | `-spread` | Standard deviation of the weighting function as a fraction of the range. |
39 * | `-kde` | Enables n-dimensional Kernel Density Estimation with options for bins, grid output, and sampling. |
40 * | `-output` | Specifies the output column name (default: `LocalDensity`). |
41 * | `-weight` | Name of the column for weighting the contributions of each point. |
42 * | `-verbose` | Prints progress information while running. |
43 *
44 * @subsection Incompatibilities
45 * - `-kde=spanPages` is incompatible with the `-pipe` option.
46 * - The `-weight` option is only supported for KDE mode.
47 * - One and only one of the following must be specified:
48 * - `-fraction`
49 * - `-spread`
50 * - `-kde`
51 *
52 * @subsection Requirements
53 * - For the `-kde` option:
54 * - The `bins` value must be >= 3.
55 * - `nsigma` must be >= 1.
56 * - The `explimit` must be within (0, 1].
57 *
58 * @copyright
59 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
60 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
61 *
62 * @license
63 * This file is distributed under the terms of the Software License Agreement
64 * found in the file LICENSE included with this distribution.
65 *
66 * @authors
67 * M. Borland, R. Soliday, N. Kuklev
68 */
69
70#include "mdb.h"
71#include "SDDS.h"
72#include "scan.h"
73#include <ctype.h>
74#include <gsl/gsl_statistics.h>
75#include <gsl/gsl_sort.h>
76#include <gsl/gsl_math.h>
77
78#if defined(linux) || (defined(_WIN32) && !defined(_MINGW))
79# include <omp.h>
80#else
81# define NOTHREADS 1
82#endif
83
84/* Enumeration for option types */
85enum option_type {
86 CLO_COLUMNS,
87 CLO_PIPE,
88 CLO_OUTPUT,
89 CLO_FRACTION,
90 CLO_SPREAD,
91 CLO_KDE,
92 CLO_VERBOSE,
93 CLO_THREADS,
94 CLO_WEIGHT,
95 N_OPTIONS
96};
97
98char *option[N_OPTIONS] = {
99 "columns",
100 "pipe",
101 "output",
102 "fraction",
103 "spread",
104 "kde",
105 "verbose",
106 "threads",
107 "weight",
108};
109
110#define NORM_NONE 0
111#define NORM_RANGE 1
112#define NORM_RMS 2
113#define NORM_OPTIONS 3
114char *normalizationOption[NORM_OPTIONS] = {
115 "none", "range", "rms"};
116
117/* Improved usage message for better readability */
118static char *USAGE =
119 "sddslocaldensity [<inputfile>] [<outputfile>]\n"
120 " [-pipe=[input][,output]]\n"
121 " -columns=<normalizationMode>,<name>[,...]\n"
122 " {\n"
123 " -fraction=<value> |\n"
124 " -spread=<value> |\n"
125 " -kde=bins=<number>[,gridoutput=<filename>][,nsigma=<value>][,explimit=<value>]\n"
126 " [,sample=<fraction>|use=<number>][,spanPages]\n"
127 " }\n"
128 " [-output=<columnName>]\n"
129 " [-weight=<columnName>]\n"
130 " [-threads=<number>]\n"
131 " [-verbose]\n"
132 "Options:\n"
133 " -pipe The standard SDDS Toolkit pipe option.\n"
134 " -threads The number of threads to use.\n"
135 " -columns Specifies the names of the columns to include. The names may include wildcards.\n"
136 " The normalization mode is one of \"none\", \"range\", or \"rms\".\n"
137 " Note that the normalization mode is irrelevant when fraction or spread options are used.\n"
138 " -weight Name of the column with which to weight the contributions of each point.\n"
139 " -fraction Fraction of the range to use to identify \"nearby\" points.\n"
140 " -spread Standard deviation of the weighting function as a fraction of the range.\n"
141 " -kde If specified, use n-dimensional Kernel Density Estimation instead of a point-based algorithm.\n"
142 " Highly recommended when the number of data points is large to avoid N² growth in runtime.\n"
143 " nsigma gives the number of standard deviations of the bandwidth over which to sum the\n"
144 " Gaussian factor; smaller numbers can considerably improve performance at the expense of\n"
145 " lower accuracy in the tails. Using the sample qualifier allows using a randomly-sampled\n"
146 " fraction of the data to create the density map. The use qualifier allows computing the\n"
147 " sample fraction so that approximately the indicated number of samples is used.\n"
148 " If spanPages is given, the KDE density map is created using data from all pages of the file;\n"
149 " however, the output retains the original page breakdown; useful when processing very large\n"
150 " quantities of data.\n"
151 " -output Name of the output column. Defaults to \"LocalDensity\".\n"
152 " -verbose Print progress information while running.\n\n"
153 "Program by Michael Borland. (" __DATE__ " " __TIME__ "\", SVN revision: " SVN_VERSION ")\n";
154
155typedef struct
156{
157 short normalizationMode;
158 long suppliedNames;
159 char **suppliedName; /* may include wildcards */
161
162long resolveColumnNames(SDDS_DATASET *SDDSin, COLUMN_LIST *columnList, int32_t nColumnLists,
163 char ***columnName, short **normalizationMode, int32_t *nColumns);
164void normalizeData(double *data, int64_t rows, short mode, double min, double max, int threads);
165
166typedef struct bin_layer {
167 long bins; /* i = (0,...,bins-1) */
168 double *sum; /* defined only at the lowest layer */
169 struct bin_layer *nextLowerLayer;
170} BIN_LAYER;
171
172double SilvermansBandwidth(double data[], long dimensions, int64_t M);
173void createBinTree(long bins, long inputColumns, BIN_LAYER *layer);
174void fillIndexArray(double *point, long bins, long inputColumns, long *indexList, double *min, double *delta);
175void addToBinValue(int myid, long bins, long inputColumns, BIN_LAYER *layer, long *indexList, double quantity);
176void zeroBinValues(long bins, long inputColumns, BIN_LAYER *layer);
177double retrieveBinValue(long bins, long inputColumns, BIN_LAYER *layer, long *indexList);
178double interpolateBinValue(long bins, long inputColumns, BIN_LAYER *layer, double *data, double *min, double *delta, long *indexList);
179void interpolateBinValue0(int myid);
180void addDensityToBinTree(int myid, BIN_LAYER *layer, long bins, long inputColumns,
181 double *min, double *delta, double *bw, uint64_t row, uint64_t rows, double **data,
182 double weight, long nSigmas, double expLimit);
183void addDensityToBinTree1(int myid, BIN_LAYER *layer, long bins, long inputColumns,
184 double *min, double *delta, double *bw, double *data, double weight, uint64_t rows, long nSigmas,
185 double expLimit);
186int advanceCounter(long *index, long *index1, long *index2, long n);
187void dumpBinValues(SDDS_DATASET *SDDSkde, SDDS_DATASET *SDDSin, long bins, char **inputColumn, long inputColumns,
188 char *densityColumn, BIN_LAYER *layer, double *min, double *delta);
189void rescaleDensity(long bins, long inputColumns, BIN_LAYER *layer, double factor);
190void addDensityToBinTree0(int myid);
191
192#define KDE_BINS_SEEN 0x001UL
193#define KDE_GRIDOUTPUT_SEEN 0x002UL
194#define KDE_NSIGMAS_SEEN 0x004UL
195#define KDE_EXPLIMIT_SEEN 0x008UL
196#define KDE_SAMPLE_SEEN 0x010UL
197#define KDE_USE_SEEN 0x020UL
198#define KDE_SPAN_PAGES 0x040UL
199
200int threads = 1;
201BIN_LAYER binTree;
202long bins, nKdeSigmas;
203double *min, *max, *delta;
204int64_t iRow, rows, jRow, rowsSampled;
205int64_t *rowsSampledThread = NULL;
206double **data;
207double kdeExpLimit;
208int verbose;
209double kdeSampleFraction;
210int32_t inputColumns;
211double *bw = NULL;
212double *density = NULL;
213double *weightValue = NULL;
214
215int main(int argc, char **argv) {
216 int iArg;
217 COLUMN_LIST *columnList;
218 int32_t nColumnLists;
219 char **inputColumn, *weightColumn;
220 short *normalizationMode;
221 double fraction, spreadFraction;
222 char *input, *output;
223 char *outputColumn;
224 long i, readCode;
225 unsigned long pipeFlags;
226 SCANNED_ARG *scanned;
227 SDDS_DATASET SDDSin, SDDSout, SDDSkde;
228 char *kdeGridOutput;
229 int32_t kdeNumberToUse;
230 long pass;
231 unsigned long kdeFlags;
232 double startTime;
233
235 argc = scanargs(&scanned, argc, argv);
236 if (argc < 3 || argc > (3 + N_OPTIONS))
237 bomb(NULL, USAGE);
238
239 output = input = NULL;
240 outputColumn = weightColumn = NULL;
241 columnList = NULL;
242 nColumnLists = 0;
243 pipeFlags = 0;
244 fraction = spreadFraction = 0;
245 bins = 0;
246 kdeGridOutput = NULL;
247 verbose = 0;
248 nKdeSigmas = 5;
249 kdeExpLimit = 1e-16;
250 kdeSampleFraction = 1;
251 kdeNumberToUse = -1;
252 kdeFlags = 0;
253
254 for (iArg = 1; iArg < argc; iArg++) {
255 if (scanned[iArg].arg_type == OPTION) {
256 /* process options here */
257 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
258 case CLO_COLUMNS:
259 if (scanned[iArg].n_items < 3)
260 SDDS_Bomb("invalid -columns syntax");
261 columnList = SDDS_Realloc(columnList, sizeof(*columnList) * (nColumnLists + 1));
262 if ((columnList[nColumnLists].normalizationMode = match_string(scanned[iArg].list[1], normalizationOption,
263 NORM_OPTIONS, 0)) == -1)
264 SDDS_Bomb("invalid normalization mode given");
265 columnList[nColumnLists].suppliedNames = scanned[iArg].n_items - 2;
266 columnList[nColumnLists].suppliedName = tmalloc(sizeof(*(columnList[nColumnLists].suppliedName)) * columnList[nColumnLists].suppliedNames);
267 for (i = 0; i < columnList[nColumnLists].suppliedNames; i++) {
268 columnList[nColumnLists].suppliedName[i] = scanned[iArg].list[i + 2];
269 scanned[iArg].list[i + 2] = NULL;
270 }
271 nColumnLists++;
272 break;
273 case CLO_OUTPUT:
274 if (scanned[iArg].n_items != 2)
275 SDDS_Bomb("invalid -outputColumn syntax: give a name");
276 outputColumn = scanned[iArg].list[1];
277 scanned[iArg].list[1] = NULL;
278 break;
279 case CLO_FRACTION:
280 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &fraction) != 1 || fraction <= 0)
281 SDDS_Bomb("invalid -fraction syntax: give a value greater than 0");
282 break;
283 case CLO_SPREAD:
284 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &spreadFraction) != 1 || spreadFraction <= 0)
285 SDDS_Bomb("invalid -spread syntax: give a value greater than 0");
286 break;
287 case CLO_THREADS:
288 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%d", &threads) != 1 || threads <= 0)
289 SDDS_Bomb("invalid -threads syntax: give a value greater than 0");
290 break;
291 case CLO_KDE:
292 if (scanned[iArg].n_items < 2)
293 SDDS_Bomb("invalid -kde syntax: give number of bins");
294 scanned[iArg].n_items -= 1;
295 kdeFlags = 0;
296 if (!scanItemList(&kdeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
297 "bins", SDDS_LONG, &bins, 1, KDE_BINS_SEEN,
298 "gridoutput", SDDS_STRING, &kdeGridOutput, 1, KDE_GRIDOUTPUT_SEEN,
299 "nsigmas", SDDS_LONG, &nKdeSigmas, 1, KDE_NSIGMAS_SEEN,
300 "explimit", SDDS_DOUBLE, &kdeExpLimit, 1, KDE_EXPLIMIT_SEEN,
301 "sample", SDDS_DOUBLE, &kdeSampleFraction, 1, KDE_SAMPLE_SEEN,
302 "use", SDDS_LONG, &kdeNumberToUse, 1, KDE_USE_SEEN,
303 "spanpages", -1, NULL, 0, KDE_SPAN_PAGES,
304 NULL))
305 SDDS_Bomb("invalid -kde syntax");
306 if (bins < 3)
307 SDDS_Bomb("Number of bins should be at least 3 for KDE");
308 if (nKdeSigmas < 1)
309 SDDS_Bomb("Number of sigmas should be at least 1 for KDE");
310 if (kdeExpLimit <= 0 || kdeExpLimit > 1)
311 SDDS_Bomb("Exponential limit for KDE must be (0, 1].");
312 if (kdeSampleFraction <= 0 || kdeSampleFraction > 1)
313 SDDS_Bomb("Sample fraction for KDE must be (0, 1].");
314 if (kdeFlags & KDE_USE_SEEN) {
315 if (kdeNumberToUse <= 1)
316 SDDS_Bomb("Number to use for KDE must be at least 1.");
317 if (kdeFlags & KDE_SAMPLE_SEEN)
318 SDDS_Bomb("Give sample fraction or number to use for KDE, not both.");
319 }
320 break;
321 case CLO_PIPE:
322 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
323 SDDS_Bomb("invalid -pipe syntax");
324 break;
325 case CLO_VERBOSE:
326 verbose = 1;
327 break;
328 case CLO_WEIGHT:
329 if (scanned[iArg].n_items != 2)
330 SDDS_Bomb("invalid -weight syntax: give the name of a column");
331 weightColumn = scanned[iArg].list[1];
332 break;
333 default:
334 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
335 exit(EXIT_FAILURE);
336 break;
337 }
338 } else {
339 if (!input)
340 input = scanned[iArg].list[0];
341 else if (!output)
342 output = scanned[iArg].list[0];
343 else
344 SDDS_Bomb("too many filenames seen");
345 }
346 }
347
348 startTime = delapsed_time();
349 if (((fraction > 0 ? 1 : 0) + (spreadFraction > 0 ? 1 : 0) + (bins > 0)) > 1)
350 SDDS_Bomb("give only one of -fraction, -spread, or -kde");
351
352 processFilenames("sddslocaldensity", &input, &output, pipeFlags, 0, NULL);
353
354 if (kdeFlags & KDE_SPAN_PAGES && pipeFlags)
355 SDDS_Bomb("-kde=spanPages is incompatible with -pipe option");
356 if (!kdeFlags && weightColumn)
357 SDDS_Bomb("-weight is only supported for -kde at present");
358
359 if (!nColumnLists)
360 SDDS_Bomb("supply the names of columns to include with the -columns option");
361
362 if (!SDDS_InitializeInput(&SDDSin, input))
363 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
364
365 if (!resolveColumnNames(&SDDSin, columnList, nColumnLists, &inputColumn, &normalizationMode, &inputColumns))
366 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
367 if (weightColumn && !(SDDS_CheckColumn(&SDDSin, weightColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK))
368 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
369
370 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
371 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
372
373 if (!outputColumn)
374 cp_str(&outputColumn, "LocalDensity");
375
376 if (!SDDS_DefineSimpleColumn(&SDDSout, outputColumn, NULL, SDDS_DOUBLE) ||
377 !SDDS_DefineSimpleParameter(&SDDSout, "sddslocaldensityElapsedTime", "s", SDDS_DOUBLE) ||
378 !SDDS_DefineSimpleParameter(&SDDSout, "sddslocaldensityThreads", NULL, SDDS_SHORT))
379 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
380
381 if (!SDDS_WriteLayout(&SDDSout))
382 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
383
384 if (kdeGridOutput) {
385 if (!SDDS_InitializeOutput(&SDDSkde, SDDS_BINARY, 1, NULL, NULL, kdeGridOutput))
386 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
387 for (i = 0; i < inputColumns; i++)
388 if (!SDDS_TransferColumnDefinition(&SDDSkde, &SDDSin, inputColumn[i], NULL))
389 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
390 if (!SDDS_TransferAllParameterDefinitions(&SDDSkde, &SDDSin, SDDS_TRANSFER_OVERWRITE))
391 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
392 if (!SDDS_DefineSimpleColumn(&SDDSkde, outputColumn, NULL, SDDS_DOUBLE))
393 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
394 if (!SDDS_WriteLayout(&SDDSkde))
395 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
396 }
397
398 data = tmalloc(sizeof(*data) * inputColumns);
399
400 if (bins) {
401 binTree.sum = NULL;
402 binTree.bins = 0;
403 binTree.nextLowerLayer = NULL;
404 createBinTree(bins, inputColumns, &binTree);
405 }
406
407 min = calloc(inputColumns, sizeof(*min));
408 max = calloc(inputColumns, sizeof(*max));
409 delta = malloc(sizeof(*delta) * inputColumns);
410#if !defined(NOTHREADS)
411 omp_set_num_threads(threads);
412#endif
413 bw = malloc(sizeof(*bw) * inputColumns);
414
415 pass = 1;
416 while ((kdeFlags & KDE_SPAN_PAGES && pass < 4) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
417 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
418 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
419 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
420 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
421 }
422 if ((rows = SDDS_CountRowsOfInterest(&SDDSin))) {
423 if (verbose)
424 fprintf(stderr, "Processing page %ld (pass %ld) with %" PRId64 " rows\n", readCode, pass, rows);
425 if (weightColumn &&
426 !(weightValue = SDDS_GetColumnInDoubles(&SDDSin, weightColumn)))
427 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
428 for (i = 0; i < inputColumns; i++) {
429 if (!(data[i] = SDDS_GetColumnInDoubles(&SDDSin, inputColumn[i])))
430 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
431 if (pass == 1) {
432 update_min_max(&min[i], &max[i], data[i], rows, kdeFlags & KDE_SPAN_PAGES ? readCode == 1 : 1);
433 if (bins)
434 delta[i] = (max[i] - min[i]) / (bins - 1);
435 else
436 delta[i] = 0;
437 if (verbose)
438 fprintf(stderr, "%s:[%le, %le] delta=%le\n", inputColumn[i], min[i], max[i], delta[i]);
439 }
440 if (pass != 1)
441 normalizeData(data[i], rows, normalizationMode[i], min[i], max[i], threads);
442 }
443 if (bins) {
444 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
445 (kdeFlags & KDE_SPAN_PAGES && pass == 2 && readCode == 1)) {
446 for (i = 0; i < inputColumns; i++) {
447 bw[i] = SilvermansBandwidth(data[i], inputColumns, rows);
448 if (verbose)
449 fprintf(stderr, "Bandwidth for %s is %le\n", inputColumn[i], bw[i]);
450 }
451 }
452
453 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
454 (kdeFlags & KDE_SPAN_PAGES && pass == 2)) {
455 if (verbose)
456 fprintf(stderr, "Summing density over grid.\n");
457 rowsSampled = 0;
458 if (kdeFlags & KDE_USE_SEEN) {
459 if ((kdeSampleFraction = (1.0 * kdeNumberToUse) / rows) > 1)
460 kdeSampleFraction = 1;
461 }
462 rowsSampledThread = calloc(threads, sizeof(*rowsSampledThread));
463 rowsSampled = 0;
464#pragma omp parallel
465 {
466#if defined(NOTHREADS)
467 addDensityToBinTree0(0);
468#else
469 addDensityToBinTree0(omp_get_thread_num());
470#endif
471 }
472 for (i = 0; i < threads; i++) {
473 rowsSampled += rowsSampledThread[i];
474 }
475 free(rowsSampledThread);
476
477 if (rowsSampled != rows) {
478 if (verbose)
479 fprintf(stderr, "%" PRId64 " of %" PRId64 " rows sampled\n", rowsSampled, rows);
480 rescaleDensity(bins, inputColumns, &binTree, (1.0 * rows / rowsSampled));
481 }
482 if (kdeGridOutput) {
483 if (verbose)
484 fprintf(stderr, "Dumping KDE grid\n");
485 dumpBinValues(&SDDSkde, &SDDSin, bins, inputColumn, inputColumns, outputColumn, &binTree, min, delta);
486 }
487 }
488
489 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
490
491 density = malloc(sizeof(*density) * rows);
492 if (verbose)
493 fprintf(stderr, "Interpolating density\n");
494#pragma omp parallel
495 {
496#if defined(NOTHREADS)
497 interpolateBinValue0(0);
498#else
499 interpolateBinValue0(omp_get_thread_num());
500#endif
501 }
502
503 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, density, rows, outputColumn))
504 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
505 free(density);
506 if (!(kdeFlags & KDE_SPAN_PAGES)) {
507 if (verbose)
508 fprintf(stderr, "Setting density map to zero\n");
509 zeroBinValues(bins, inputColumns, &binTree);
510 }
511 }
512 for (i = 0; i < inputColumns; i++)
513 free(data[i]);
514 } /* KDE mode */
515 else {
516 if (fraction > 0) {
517 double *count, *epsilon, distance;
518 count = tmalloc(rows * sizeof(*count));
519 epsilon = tmalloc(inputColumns * sizeof(*epsilon));
520 for (i = 0; i < inputColumns; i++)
521 epsilon[i] = fraction * (max[i] - min[i]);
522 for (iRow = 0; iRow < rows; iRow++) {
523 int64_t nInside = 0;
524 for (jRow = 0; jRow < rows; jRow++) {
525 short inside = 1;
526 if (iRow != jRow) {
527 for (i = 0; i < inputColumns; i++) {
528 distance = fabs(data[i][iRow] - data[i][jRow]);
529 if (distance > epsilon[i]) {
530 inside = 0;
531 break;
532 }
533 }
534 }
535 if (inside)
536 nInside++;
537 }
538 count[iRow] = nInside;
539 }
540 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, count, rows, outputColumn))
541 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
542 free(count);
543 free(epsilon);
544 } else if (spreadFraction > 0) {
545 double *count, *spread;
546 count = tmalloc(rows * sizeof(*count));
547 spread = tmalloc(sizeof(*spread) * inputColumns);
548 for (i = 0; i < inputColumns; i++) {
549 spread[i] = spreadFraction * (max[i] - min[i]);
550 }
551 for (iRow = 0; iRow < rows; iRow++) {
552 count[iRow] = 0;
553 for (jRow = 0; jRow < rows; jRow++) {
554 double term = 1;
555 for (i = 0; i < inputColumns; i++)
556 term *= exp(-sqr((data[i][iRow] - data[i][jRow]) / spread[i]) / 2);
557 count[iRow] += term;
558 }
559 }
560 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, count, rows, outputColumn))
561 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
562 free(count);
563 free(spread);
564 } else {
565 double *distance;
566 distance = tmalloc(rows * sizeof(*distance));
567 for (iRow = 0; iRow < rows; iRow++) {
568 double sum;
569 distance[iRow] = 0;
570 for (jRow = 0; jRow < rows; jRow++) {
571 sum = 0;
572 for (i = 0; i < inputColumns; i++)
573 sum += sqr(data[i][iRow] - data[i][jRow]);
574 distance[iRow] += sqrt(sum);
575 }
576 }
577 for (iRow = 0; iRow < rows; iRow++)
578 distance[iRow] = rows / distance[iRow];
579 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, distance, rows, outputColumn))
580 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
581 free(distance);
582 }
583 }
584 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
585 if (!SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
586 "sddslocaldensityElapsedTime", delapsed_time() - startTime,
587 "sddslocaldensityThreads", (short)threads,
588 NULL) ||
589 !SDDS_WritePage(&SDDSout))
590 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
591 }
592 }
593 }
594 if (kdeFlags & KDE_SPAN_PAGES && pass != 3) {
595 if (verbose)
596 fprintf(stderr, "Closing input file %s and reopening for second pass\n", input);
597 if (!SDDS_Terminate(&SDDSin) || !SDDS_InitializeInput(&SDDSin, input)) {
598 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
599 exit(EXIT_FAILURE);
600 }
601 }
602 pass++;
603 }
604 if (!SDDS_Terminate(&SDDSin) || !SDDS_Terminate(&SDDSout)) {
605 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
606 exit(EXIT_FAILURE);
607 }
608 if (kdeGridOutput && !SDDS_Terminate(&SDDSkde)) {
609 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
610 exit(EXIT_FAILURE);
611 }
612 if (verbose)
613 printf("Execution completed in %le s\n", delapsed_time() - startTime);
614 return EXIT_SUCCESS;
615}
616
617long resolveColumnNames(SDDS_DATASET *SDDSin, COLUMN_LIST *columnList, int32_t nColumnLists,
618 char ***columnName, short **normalizationMode, int32_t *nColumns) {
619 int32_t i, j, k;
620 char **newColumnName;
621 int32_t newColumns;
622 short duplicate;
623
624 *columnName = NULL;
625 *normalizationMode = NULL;
626 *nColumns = 0;
627
628 for (i = 0; i < nColumnLists; i++) {
629 SDDS_SetColumnFlags(SDDSin, 0);
630 for (j = 0; j < columnList[i].suppliedNames; j++) {
631 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, columnList[i].suppliedName[j], SDDS_OR))
632 return 0;
633 }
634 if (!(newColumnName = SDDS_GetColumnNames(SDDSin, &newColumns)) || newColumns == 0) {
635 SDDS_SetError("no columns found for one or more -column options");
636 return 0;
637 }
638 for (j = 0; j < newColumns; j++) {
639 duplicate = 0;
640 for (k = 0; k < *nColumns; k++) {
641 if (strcmp(newColumnName[j], (*columnName)[k]) == 0) {
642 /* overwrite the previous normalization mode */
643 (*normalizationMode)[k] = columnList[i].normalizationMode;
644 duplicate = 1;
645 }
646 }
647 if (!duplicate) {
648 *columnName = SDDS_Realloc(*columnName, sizeof(**columnName) * (*nColumns + 1));
649 (*columnName)[*nColumns] = newColumnName[j];
650 *normalizationMode = SDDS_Realloc(*normalizationMode, sizeof(**normalizationMode) * (*nColumns + 1));
651 (*normalizationMode)[*nColumns] = columnList[i].normalizationMode;
652 *nColumns += 1;
653 }
654 }
655 }
656 /*
657 printf("Computing density using %d columns\n", *nColumns);
658 for (i=0; i<*nColumns; i++) {
659 printf("%s --- %s normalization\n",
660 (*columnName)[i], normalizationOption[(*normalizationMode)[i]]);
661 fflush(stdout);
662 }
663 */
664 return 1;
665}
666
667void normalizeData(double *data, int64_t rows, short mode, double min_val, double max_val, int threads) {
668 int64_t i;
669 double offset, rms, divisor;
670
671 switch (mode) {
672 case NORM_RANGE:
673 if (min_val == max_val)
674 SDDS_Bomb("attempt to normalize data with zero range");
675 divisor = max_val - min_val;
676 offset = max_val - min_val;
677 break;
678 case NORM_RMS:
679 computeMomentsThreaded(&offset, NULL, &rms, NULL, data, rows, threads);
680 if (rms == 0)
681 SDDS_Bomb("attempt to normalize data with zero rms");
682 divisor = rms;
683 break;
684 default:
685 divisor = 1;
686 offset = 0;
687 break;
688 }
689 for (i = 0; i < rows; i++)
690 data[i] = (data[i] - offset) / divisor;
691}
692
693double SilvermansBandwidth(double data[], long dimensions, int64_t M) {
694 /* See https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation#Optimal_bandwidth_matrix_selection */
695 return pow(4.0 / (dimensions + 2.0) / M, 2.0 / (dimensions + 4.0)) * ipow2(gsl_stats_sd(data, 1, M));
696}
697
698void createBinTree(long bins, long inputColumns, BIN_LAYER *layer) {
699 if (inputColumns == 1) {
700 layer->sum = calloc(bins, sizeof(*(layer->sum)));
701 } else {
702 long i;
703 layer->nextLowerLayer = calloc(bins, sizeof(BIN_LAYER));
704 for (i = 0; i < bins; i++)
705 createBinTree(bins, inputColumns - 1, layer->nextLowerLayer + i);
706 }
707}
708
709void fillIndexArray(double *point, long bins, long inputColumns, long *indexList, double *min_val, double *delta_val) {
710 long i, j, bins_m1;
711
712 bins_m1 = bins - 1;
713 for (i = 0; i < inputColumns; i++) {
714 j = (point[i] - min_val[i]) / delta_val[i];
715 if (j < 0)
716 j = 0;
717 else if (j > bins_m1)
718 j = bins_m1;
719 indexList[i] = j;
720 }
721}
722
723void addToBinValue(int myid, long bins, long inputColumns, BIN_LAYER *layer, long *indexList, double quantity) {
724 while (inputColumns--) {
725 if (inputColumns == 0) {
726#pragma omp atomic
727 layer->sum[indexList[0]] += quantity;
728 } else {
729 layer = layer->nextLowerLayer + indexList[0];
730 indexList++;
731 }
732 }
733}
734
735void rescaleDensity(long bins, long inputColumns, BIN_LAYER *layer, double factor) {
736 long i;
737 if (inputColumns == 1) {
738 for (i = 0; i < bins; i++)
739 layer->sum[i] *= factor;
740 } else {
741 for (i = 0; i < bins; i++)
742 rescaleDensity(bins, inputColumns - 1, layer->nextLowerLayer + i, factor);
743 }
744}
745
746void zeroBinValues(long bins, long inputColumns, BIN_LAYER *layer) {
747 long i;
748 if (inputColumns == 1) {
749 for (i = 0; i < bins; i++)
750 layer->sum[i] = 0;
751 } else {
752 for (i = 0; i < bins; i++)
753 zeroBinValues(bins, inputColumns - 1, layer->nextLowerLayer + i);
754 }
755}
756
757void dumpBinValues(SDDS_DATASET *SDDSkde, SDDS_DATASET *SDDSin,
758 long bins, char **inputColumn, long inputColumns,
759 char *densityColumn, BIN_LAYER *layer, double *min_val, double *delta_val) {
760 long i;
761 long *index, *index1, *index2;
762 int64_t row;
763 long *columnIndex, densityIndex;
764
765 if (!SDDS_StartPage(SDDSkde, (int64_t)ipow(bins, inputColumns)) ||
766 !SDDS_CopyParameters(SDDSkde, SDDSin))
767 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
768
769 columnIndex = calloc(inputColumns, sizeof(*columnIndex));
770 for (i = 0; i < inputColumns; i++)
771 columnIndex[i] = SDDS_GetColumnIndex(SDDSkde, inputColumn[i]);
772 densityIndex = SDDS_GetColumnIndex(SDDSkde, densityColumn);
773
774 index1 = calloc(inputColumns, sizeof(*index1));
775 index2 = calloc(inputColumns, sizeof(*index2));
776 index = calloc(inputColumns, sizeof(*index));
777 for (i = 0; i < inputColumns; i++)
778 index2[i] = bins - 1;
779 row = 0;
780 do {
781 for (i = 0; i < inputColumns; i++)
782 if (!SDDS_SetRowValues(SDDSkde, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE,
783 row, columnIndex[i], index[i] * delta_val[i] + min_val[i], -1))
784 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
785 if (!SDDS_SetRowValues(SDDSkde, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE,
786 row, densityIndex, retrieveBinValue(bins, inputColumns, layer, index), -1))
787 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
788 row++;
789 } while (advanceCounter(index, index1, index2, inputColumns));
790
791 if (!SDDS_WritePage(SDDSkde))
792 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
793
794 free(columnIndex);
795 free(index1);
796 free(index2);
797 free(index);
798}
799
800double retrieveBinValue(long bins, long inputColumns, BIN_LAYER *layer, long *indexList) {
801 while (--inputColumns) {
802 layer = layer->nextLowerLayer + indexList[0];
803 indexList++;
804 }
805 return layer->sum[indexList[0]];
806}
807
808void interpolateBinValue0(int myid) {
809 uint64_t iRow, iRow0, iRow1;
810 long *indexList, j;
811 double *point;
812 indexList = malloc(sizeof(*indexList) * inputColumns);
813 point = malloc(sizeof(*point) * inputColumns);
814 iRow0 = myid * (rows / threads);
815 if (myid == (threads - 1))
816 iRow1 = rows - 1;
817 else
818 iRow1 = (myid + 1) * (rows / threads) - 1;
819 if (verbose) {
820 printf("Thread %d handling interpolation for rows %ld to %ld\n", myid, (long)iRow0, (long)iRow1);
821 fflush(stdout);
822 }
823 for (iRow = iRow0; iRow <= iRow1; iRow++) {
824 for (j = 0; j < inputColumns; j++)
825 point[j] = data[j][iRow];
826 fillIndexArray(point, bins, inputColumns, indexList, min, delta);
827 density[iRow] = interpolateBinValue(bins, inputColumns, &binTree, point, min, delta, indexList);
828 }
829 free(point);
830 free(indexList);
831 if (verbose) {
832 printf("Finished interpolation on thread %d\n", myid);
833 fflush(stdout);
834 }
835 return;
836}
837
838double interpolateBinValue(long bins, long inputColumns, BIN_LAYER *layer, double *data_point, double *min_val, double *delta_val, long *indexList) {
839 double value1, value2, value;
840 if (indexList[0] != (bins - 1)) {
841 if (inputColumns > 1) {
842 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
843 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] + 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
844 } else {
845 value1 = layer->sum[indexList[0] + 0];
846 value2 = layer->sum[indexList[0] + 1];
847 }
848 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * indexList[0]));
849 } else {
850 if (inputColumns > 1) {
851 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] - 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
852 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
853 } else {
854 value1 = layer->sum[indexList[0] - 1];
855 value2 = layer->sum[indexList[0] + 0];
856 }
857 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * (indexList[0] - 1)));
858 }
859 if (isnan(value) || isinf(value) || value < 0)
860 value = 0;
861 return value;
862}
863
864void addDensityToBinTree0(int myid) {
865 uint64_t iRow, iRow0, iRow1;
866 uint64_t myRowsSampled;
867 double time0, time1;
868
869 iRow0 = myid * (rows / threads);
870 if (myid == (threads - 1))
871 iRow1 = rows - 1;
872 else
873 iRow1 = (myid + 1) * (rows / threads) - 1;
874 if (verbose) {
875 printf("Thread %d handling density addition for rows %ld to %ld\n", myid, (long)iRow0, (long)iRow1);
876 fflush(stdout);
877 }
878
879 myRowsSampled = 0;
880 time0 = delapsed_time();
881 for (iRow = iRow0; iRow <= iRow1; iRow++) {
882 if ((kdeSampleFraction != 1) && (kdeSampleFraction < drand(-1)))
883 continue;
884 myRowsSampled++;
885 if (verbose && (time1 = delapsed_time()) > (time0 + 10)) {
886 time0 = time1;
887 fprintf(stderr, "Addition of density %f %% complete after %.0lf s\n", (100.0 * (iRow - iRow0) / (iRow1 - iRow0 + 1.0)), time1);
888 }
889 addDensityToBinTree(myid, &binTree, bins, inputColumns, min, delta, bw, iRow, rows, data,
890 weightValue ? weightValue[iRow] : 1.0, nKdeSigmas, kdeExpLimit);
891 }
892 if (verbose) {
893 printf("Finished adding density on thread %d\n", myid);
894 fflush(stdout);
895 }
896 rowsSampledThread[myid] = myRowsSampled;
897 return;
898}
899
900void addDensityToBinTree(int myid, BIN_LAYER *layer, long bins, long inputColumns,
901 double *min_val, double *delta_val, double *bw, uint64_t row, uint64_t rows, double **data,
902 double weight, long nSigmas, double expLimit) {
903 double *samplePoint;
904 long i;
905
906 samplePoint = malloc(sizeof(*samplePoint) * inputColumns);
907 for (i = 0; i < inputColumns; i++)
908 samplePoint[i] = data[i][row];
909
910 addDensityToBinTree1(myid, layer, bins, inputColumns, min_val, delta_val, bw, samplePoint, weight, rows, nSigmas, expLimit);
911
912 free(samplePoint);
913}
914
915int advanceCounter(long *index, long *index1, long *index2, long n) {
916 long i, carry, rolledOver;
917 carry = 1;
918 rolledOver = 0;
919 for (i = 0; i < n; i++) {
920 index[i] += carry;
921 if (index[i] > index2[i]) {
922 carry = 1;
923 index[i] = index1[i];
924 rolledOver += 1;
925 } else
926 carry = 0;
927 }
928 return rolledOver != n;
929}
930
931void addDensityToBinTree1(int myid, BIN_LAYER *layer, long bins, long inputColumns,
932 double *min_val, double *delta_val, double *bw, double *data_point, double weight,
933 uint64_t rows, long nSigmas, double expLimit) {
934 long i;
935 /* index limits in each dimension */
936 long *index1 = NULL, *index2 = NULL;
937 long *index = NULL;
938 /* allows multiplying in the inner loop instead of dividing (for each thread) */
939 double *invSqrtBw = NULL, *invBw = NULL;
940 long deltaIndex;
941 double zLimit, f1;
942
943 index = malloc(sizeof(*index) * inputColumns);
944 index1 = malloc(sizeof(*index1) * inputColumns);
945 index2 = malloc(sizeof(*index2) * inputColumns);
946 invSqrtBw = malloc(sizeof(*invSqrtBw) * inputColumns);
947 invBw = malloc(sizeof(*invBw) * inputColumns);
948
949 fillIndexArray(data_point, bins, inputColumns, index1, min_val, delta_val);
950 memcpy(index2, index1, sizeof(*index2) * inputColumns);
951 for (i = 0; i < inputColumns; i++) {
952 invBw[i] = 1. / bw[i];
953 invSqrtBw[i] = sqrt(invBw[i]);
954 deltaIndex = nSigmas * sqrt(bw[i]) / delta_val[i]; /* sum over +/-nSigmas*sigma */
955 if ((index1[i] -= deltaIndex) < 0)
956 index1[i] = 0;
957 if ((index2[i] += deltaIndex) > (bins - 1))
958 index2[i] = bins - 1;
959 }
960
961 memcpy(index, index1, sizeof(*index) * inputColumns);
962 zLimit = -2 * log(expLimit);
963 f1 = 1 / (pow(PIx2, inputColumns / 2.0) * rows);
964 do {
965 double z, p;
966 z = 0;
967 p = f1;
968 for (i = 0; i < inputColumns; i++) {
969 z += sqr(data_point[i] - (index[i] * delta_val[i] + min_val[i])) * invBw[i];
970 p *= invSqrtBw[i];
971 }
972 if (z < zLimit)
973 addToBinValue(myid, bins, inputColumns, layer, index, exp(-z / 2) * p * weight);
974 } while (advanceCounter(index, index1, index2, inputColumns));
975
976 free(index);
977 free(index1);
978 free(index2);
979 free(invSqrtBw);
980 free(invBw);
981}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
Definition cp_str.c:28
float drand(long dummy)
Generate a uniform random float in [0,1].
Definition drand.c:41
int update_min_max(double *min, double *max, double *list, int64_t n, int32_t reset)
Updates the minimum and maximum values based on a list of doubles.
Definition findMinMax.c:72
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
Definition ipow.c:33
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
double delapsed_time(void)
Calculates the elapsed clock time since the last initialization as a numerical value.
Definition timer.c:92