49#include <gsl/gsl_statistics.h>
50#include <gsl/gsl_sort.h>
51#include <gsl/gsl_math.h>
53#if defined(linux) || (defined(_WIN32) && !defined(_MINGW))
73char *option[N_OPTIONS] = {
89char *normalizationOption[NORM_OPTIONS] = {
90 "none",
"range",
"rms"};
94 "Usage: sddslocaldensity [<inputfile>] [<outputfile>] [-pipe=[input][,output]] [-threads=<number>]\n"
95 " {-fraction=<value> | -spread=<value> | \n"
96 " -kde=bins=<number>[,gridoutput=<filename>][,nsigma=<value>][,explimit=<value>]"
97 "[,sample=<fraction>|use=<number>][,spanPages]}\n"
98 " -columns=<normalizationMode>,<name>[,...] [-output=<columnName>] "
99 "[-weight=<columnName>] [-verbose]\n\n"
101 " -pipe The standard SDDS Toolkit pipe option.\n"
102 " -threads The number of threads to use.\n"
103 " -columns Specifies the names of the columns to include. The names may include wildcards.\n"
104 " The normalization mode is one of \"none\", \"range\", or \"rms\".\n"
105 " Note that the normalization mode is irrelevant when fraction or spread options are used.\n"
106 " -weight Name of the column with which to weight the contributions of each point.\n"
107 " -fraction Fraction of the range to use to identify \"nearby\" points.\n"
108 " -spread Standard deviation of the weighting function as a fraction of the range.\n"
109 " -kde If specified, use n-dimensional Kernel Density Estimation instead of a point-based algorithm.\n"
110 " Highly recommended when the number of data points is large to avoid N² growth in runtime.\n"
111 " nsigma gives the number of standard deviations of the bandwidth over which to sum the\n"
112 " Gaussian factor; smaller numbers can considerably improve performance at the expense of\n"
113 " lower accuracy in the tails. Using the sample qualifier allows using a randomly-sampled\n"
114 " fraction of the data to create the density map. The use qualifier allows computing the\n"
115 " sample fraction so that approximately the indicated number of samples is used.\n"
116 " If spanPages is given, the KDE density map is created using data from all pages of the file;\n"
117 " however, the output retains the original page breakdown; useful when processing very large\n"
118 " quantities of data.\n"
119 " -output Name of the output column. Defaults to \"LocalDensity\".\n"
120 " -verbose Print progress information while running.\n\n"
121 "Program by Michael Borland. (" __DATE__
" " __TIME__
"\", SVN revision: " SVN_VERSION
")\n";
125 short normalizationMode;
131 char ***columnName,
short **normalizationMode, int32_t *nColumns);
132void normalizeData(
double *data, int64_t rows,
short mode,
double min,
double max,
int threads);
140double SilvermansBandwidth(
double data[],
long dimensions, int64_t M);
141void createBinTree(
long bins,
long inputColumns,
BIN_LAYER *layer);
142void fillIndexArray(
double *point,
long bins,
long inputColumns,
long *indexList,
double *min,
double *delta);
143void addToBinValue(
int myid,
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList,
double quantity);
144void zeroBinValues(
long bins,
long inputColumns,
BIN_LAYER *layer);
145double retrieveBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList);
146double interpolateBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
double *data,
double *min,
double *delta,
long *indexList);
147void interpolateBinValue0(
int myid);
148void addDensityToBinTree(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
149 double *min,
double *delta,
double *bw, uint64_t row, uint64_t rows,
double **data,
150 double weight,
long nSigmas,
double expLimit);
151void addDensityToBinTree1(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
152 double *min,
double *delta,
double *bw,
double *data,
double weight, uint64_t rows,
long nSigmas,
154int advanceCounter(
long *index,
long *index1,
long *index2,
long n);
156 char *densityColumn,
BIN_LAYER *layer,
double *min,
double *delta);
157void rescaleDensity(
long bins,
long inputColumns,
BIN_LAYER *layer,
double factor);
158void addDensityToBinTree0(
int myid);
160#define KDE_BINS_SEEN 0x001UL
161#define KDE_GRIDOUTPUT_SEEN 0x002UL
162#define KDE_NSIGMAS_SEEN 0x004UL
163#define KDE_EXPLIMIT_SEEN 0x008UL
164#define KDE_SAMPLE_SEEN 0x010UL
165#define KDE_USE_SEEN 0x020UL
166#define KDE_SPAN_PAGES 0x040UL
170long bins, nKdeSigmas;
171double *min, *max, *delta;
172int64_t iRow, rows, jRow, rowsSampled;
173int64_t *rowsSampledThread = NULL;
177double kdeSampleFraction;
180double *density = NULL;
181double *weightValue = NULL;
183int main(
int argc,
char **argv) {
186 int32_t nColumnLists;
187 char **inputColumn, *weightColumn;
188 short *normalizationMode;
189 double fraction, spreadFraction;
190 char *input, *output;
193 unsigned long pipeFlags;
194 SCANNED_ARG *scanned;
197 int32_t kdeNumberToUse;
199 unsigned long kdeFlags;
203 argc =
scanargs(&scanned, argc, argv);
204 if (argc < 3 || argc > (3 + N_OPTIONS))
207 output = input = NULL;
208 outputColumn = weightColumn = NULL;
212 fraction = spreadFraction = 0;
214 kdeGridOutput = NULL;
218 kdeSampleFraction = 1;
222 for (iArg = 1; iArg < argc; iArg++) {
223 if (scanned[iArg].arg_type == OPTION) {
225 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
227 if (scanned[iArg].n_items < 3)
229 columnList =
SDDS_Realloc(columnList,
sizeof(*columnList) * (nColumnLists + 1));
230 if ((columnList[nColumnLists].normalizationMode =
match_string(scanned[iArg].list[1], normalizationOption,
231 NORM_OPTIONS, 0)) == -1)
232 SDDS_Bomb(
"invalid normalization mode given");
233 columnList[nColumnLists].suppliedNames = scanned[iArg].n_items - 2;
234 columnList[nColumnLists].suppliedName =
tmalloc(
sizeof(*(columnList[nColumnLists].suppliedName)) * columnList[nColumnLists].suppliedNames);
235 for (i = 0; i < columnList[nColumnLists].suppliedNames; i++) {
236 columnList[nColumnLists].suppliedName[i] = scanned[iArg].list[i + 2];
237 scanned[iArg].list[i + 2] = NULL;
242 if (scanned[iArg].n_items != 2)
243 SDDS_Bomb(
"invalid -outputColumn syntax: give a name");
244 outputColumn = scanned[iArg].list[1];
245 scanned[iArg].list[1] = NULL;
248 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%lf", &fraction) != 1 || fraction <= 0)
249 SDDS_Bomb(
"invalid -fraction syntax: give a value greater than 0");
252 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%lf", &spreadFraction) != 1 || spreadFraction <= 0)
253 SDDS_Bomb(
"invalid -spread syntax: give a value greater than 0");
256 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%d", &threads) != 1 || threads <= 0)
257 SDDS_Bomb(
"invalid -threads syntax: give a value greater than 0");
260 if (scanned[iArg].n_items < 2)
261 SDDS_Bomb(
"invalid -kde syntax: give number of bins");
262 scanned[iArg].n_items -= 1;
264 if (!
scanItemList(&kdeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
265 "bins",
SDDS_LONG, &bins, 1, KDE_BINS_SEEN,
266 "gridoutput",
SDDS_STRING, &kdeGridOutput, 1, KDE_GRIDOUTPUT_SEEN,
267 "nsigmas",
SDDS_LONG, &nKdeSigmas, 1, KDE_NSIGMAS_SEEN,
268 "explimit",
SDDS_DOUBLE, &kdeExpLimit, 1, KDE_EXPLIMIT_SEEN,
269 "sample",
SDDS_DOUBLE, &kdeSampleFraction, 1, KDE_SAMPLE_SEEN,
270 "use",
SDDS_LONG, &kdeNumberToUse, 1, KDE_USE_SEEN,
271 "spanpages", -1, NULL, 0, KDE_SPAN_PAGES,
275 SDDS_Bomb(
"Number of bins should be at least 3 for KDE");
277 SDDS_Bomb(
"Number of sigmas should be at least 1 for KDE");
278 if (kdeExpLimit <= 0 || kdeExpLimit > 1)
279 SDDS_Bomb(
"Exponential limit for KDE must be (0, 1].");
280 if (kdeSampleFraction <= 0 || kdeSampleFraction > 1)
281 SDDS_Bomb(
"Sample fraction for KDE must be (0, 1].");
282 if (kdeFlags & KDE_USE_SEEN) {
283 if (kdeNumberToUse <= 1)
284 SDDS_Bomb(
"Number to use for KDE must be at least 1.");
285 if (kdeFlags & KDE_SAMPLE_SEEN)
286 SDDS_Bomb(
"Give sample fraction or number to use for KDE, not both.");
290 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
297 if (scanned[iArg].n_items != 2)
298 SDDS_Bomb(
"invalid -weight syntax: give the name of a column");
299 weightColumn = scanned[iArg].list[1];
302 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
308 input = scanned[iArg].list[0];
310 output = scanned[iArg].list[0];
316 startTime = delapsed_time();
317 if (((fraction > 0 ? 1 : 0) + (spreadFraction > 0 ? 1 : 0) + (bins > 0)) > 1)
318 SDDS_Bomb(
"give only one of -fraction, -spread, or -kde");
322 if (kdeFlags & KDE_SPAN_PAGES && pipeFlags)
323 SDDS_Bomb(
"-kde=spanPages is incompatible with -pipe option");
324 if (!kdeFlags && weightColumn)
325 SDDS_Bomb(
"-weight is only supported for -kde at present");
328 SDDS_Bomb(
"supply the names of columns to include with the -columns option");
333 if (!resolveColumnNames(&SDDSin, columnList, nColumnLists, &inputColumn, &normalizationMode, &inputColumns))
342 cp_str(&outputColumn,
"LocalDensity");
355 for (i = 0; i < inputColumns; i++)
366 data =
tmalloc(
sizeof(*data) * inputColumns);
371 binTree.nextLowerLayer = NULL;
372 createBinTree(bins, inputColumns, &binTree);
375 min = calloc(inputColumns,
sizeof(*min));
376 max = calloc(inputColumns,
sizeof(*max));
377 delta = malloc(
sizeof(*delta) * inputColumns);
378#if !defined(NOTHREADS)
379 omp_set_num_threads(threads);
381 bw = malloc(
sizeof(*bw) * inputColumns);
384 while ((kdeFlags & KDE_SPAN_PAGES && pass < 4) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
386 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
392 fprintf(stderr,
"Processing page %ld (pass %ld) with %" PRId64
" rows\n", readCode, pass, rows);
396 for (i = 0; i < inputColumns; i++) {
400 update_min_max(&min[i], &max[i], data[i], rows, kdeFlags & KDE_SPAN_PAGES ? readCode == 1 : 1);
402 delta[i] = (max[i] - min[i]) / (bins - 1);
406 fprintf(stderr,
"%s:[%le, %le] delta=%le\n", inputColumn[i], min[i], max[i], delta[i]);
409 normalizeData(data[i], rows, normalizationMode[i], min[i], max[i], threads);
412 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
413 (kdeFlags & KDE_SPAN_PAGES && pass == 2 && readCode == 1)) {
414 for (i = 0; i < inputColumns; i++) {
415 bw[i] = SilvermansBandwidth(data[i], inputColumns, rows);
417 fprintf(stderr,
"Bandwidth for %s is %le\n", inputColumn[i], bw[i]);
421 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
422 (kdeFlags & KDE_SPAN_PAGES && pass == 2)) {
424 fprintf(stderr,
"Summing density over grid.\n");
426 if (kdeFlags & KDE_USE_SEEN) {
427 if ((kdeSampleFraction = (1.0 * kdeNumberToUse) / rows) > 1)
428 kdeSampleFraction = 1;
430 rowsSampledThread = calloc(threads,
sizeof(*rowsSampledThread));
434#if defined(NOTHREADS)
435 addDensityToBinTree0(0);
437 addDensityToBinTree0(omp_get_thread_num());
440 for (i = 0; i < threads; i++) {
441 rowsSampled += rowsSampledThread[i];
443 free(rowsSampledThread);
445 if (rowsSampled != rows) {
447 fprintf(stderr,
"%" PRId64
" of %" PRId64
" rows sampled\n", rowsSampled, rows);
448 rescaleDensity(bins, inputColumns, &binTree, (1.0 * rows / rowsSampled));
452 fprintf(stderr,
"Dumping KDE grid\n");
453 dumpBinValues(&SDDSkde, &SDDSin, bins, inputColumn, inputColumns, outputColumn, &binTree, min, delta);
457 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
459 density = malloc(
sizeof(*density) * rows);
461 fprintf(stderr,
"Interpolating density\n");
464#if defined(NOTHREADS)
465 interpolateBinValue0(0);
467 interpolateBinValue0(omp_get_thread_num());
474 if (!(kdeFlags & KDE_SPAN_PAGES)) {
476 fprintf(stderr,
"Setting density map to zero\n");
477 zeroBinValues(bins, inputColumns, &binTree);
480 for (i = 0; i < inputColumns; i++)
485 double *count, *epsilon, distance;
486 count =
tmalloc(rows *
sizeof(*count));
487 epsilon =
tmalloc(inputColumns *
sizeof(*epsilon));
488 for (i = 0; i < inputColumns; i++)
489 epsilon[i] = fraction * (max[i] - min[i]);
490 for (iRow = 0; iRow < rows; iRow++) {
492 for (jRow = 0; jRow < rows; jRow++) {
495 for (i = 0; i < inputColumns; i++) {
496 distance = fabs(data[i][iRow] - data[i][jRow]);
497 if (distance > epsilon[i]) {
506 count[iRow] = nInside;
512 }
else if (spreadFraction > 0) {
513 double *count, *spread;
514 count =
tmalloc(rows *
sizeof(*count));
515 spread =
tmalloc(
sizeof(*spread) * inputColumns);
516 for (i = 0; i < inputColumns; i++) {
517 spread[i] = spreadFraction * (max[i] - min[i]);
519 for (iRow = 0; iRow < rows; iRow++) {
521 for (jRow = 0; jRow < rows; jRow++) {
523 for (i = 0; i < inputColumns; i++)
524 term *= exp(-sqr((data[i][iRow] - data[i][jRow]) / spread[i]) / 2);
534 distance =
tmalloc(rows *
sizeof(*distance));
535 for (iRow = 0; iRow < rows; iRow++) {
538 for (jRow = 0; jRow < rows; jRow++) {
540 for (i = 0; i < inputColumns; i++)
541 sum += sqr(data[i][iRow] - data[i][jRow]);
542 distance[iRow] += sqrt(sum);
545 for (iRow = 0; iRow < rows; iRow++)
546 distance[iRow] = rows / distance[iRow];
552 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
554 "sddslocaldensityElapsedTime", delapsed_time() - startTime,
555 "sddslocaldensityThreads", (
short)threads,
562 if (kdeFlags & KDE_SPAN_PAGES && pass != 3) {
564 fprintf(stderr,
"Closing input file %s and reopening for second pass\n", input);
581 printf(
"Execution completed in %le s\n", delapsed_time() - startTime);
586 char ***columnName,
short **normalizationMode, int32_t *nColumns) {
588 char **newColumnName;
593 *normalizationMode = NULL;
596 for (i = 0; i < nColumnLists; i++) {
598 for (j = 0; j < columnList[i].suppliedNames; j++) {
603 SDDS_SetError(
"no columns found for one or more -column options");
606 for (j = 0; j < newColumns; j++) {
608 for (k = 0; k < *nColumns; k++) {
609 if (strcmp(newColumnName[j], (*columnName)[k]) == 0) {
611 (*normalizationMode)[k] = columnList[i].normalizationMode;
616 *columnName =
SDDS_Realloc(*columnName,
sizeof(**columnName) * (*nColumns + 1));
617 (*columnName)[*nColumns] = newColumnName[j];
618 *normalizationMode =
SDDS_Realloc(*normalizationMode,
sizeof(**normalizationMode) * (*nColumns + 1));
619 (*normalizationMode)[*nColumns] = columnList[i].normalizationMode;
635void normalizeData(
double *data, int64_t rows,
short mode,
double min_val,
double max_val,
int threads) {
637 double offset, rms, divisor;
641 if (min_val == max_val)
642 SDDS_Bomb(
"attempt to normalize data with zero range");
643 divisor = max_val - min_val;
644 offset = max_val - min_val;
649 SDDS_Bomb(
"attempt to normalize data with zero rms");
657 for (i = 0; i < rows; i++)
658 data[i] = (data[i] - offset) / divisor;
661double SilvermansBandwidth(
double data[],
long dimensions, int64_t M) {
663 return pow(4.0 / (dimensions + 2.0) / M, 2.0 / (dimensions + 4.0)) * ipow2(gsl_stats_sd(data, 1, M));
666void createBinTree(
long bins,
long inputColumns,
BIN_LAYER *layer) {
667 if (inputColumns == 1) {
668 layer->sum = calloc(bins,
sizeof(*(layer->sum)));
671 layer->nextLowerLayer = calloc(bins,
sizeof(
BIN_LAYER));
672 for (i = 0; i < bins; i++)
673 createBinTree(bins, inputColumns - 1, layer->nextLowerLayer + i);
677void fillIndexArray(
double *point,
long bins,
long inputColumns,
long *indexList,
double *min_val,
double *delta_val) {
681 for (i = 0; i < inputColumns; i++) {
682 j = (point[i] - min_val[i]) / delta_val[i];
685 else if (j > bins_m1)
691void addToBinValue(
int myid,
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList,
double quantity) {
692 while (inputColumns--) {
693 if (inputColumns == 0) {
695 layer->sum[indexList[0]] += quantity;
697 layer = layer->nextLowerLayer + indexList[0];
703void rescaleDensity(
long bins,
long inputColumns,
BIN_LAYER *layer,
double factor) {
705 if (inputColumns == 1) {
706 for (i = 0; i < bins; i++)
707 layer->sum[i] *= factor;
709 for (i = 0; i < bins; i++)
710 rescaleDensity(bins, inputColumns - 1, layer->nextLowerLayer + i, factor);
714void zeroBinValues(
long bins,
long inputColumns,
BIN_LAYER *layer) {
716 if (inputColumns == 1) {
717 for (i = 0; i < bins; i++)
720 for (i = 0; i < bins; i++)
721 zeroBinValues(bins, inputColumns - 1, layer->nextLowerLayer + i);
726 long bins,
char **inputColumn,
long inputColumns,
727 char *densityColumn,
BIN_LAYER *layer,
double *min_val,
double *delta_val) {
729 long *index, *index1, *index2;
731 long *columnIndex, densityIndex;
737 columnIndex = calloc(inputColumns,
sizeof(*columnIndex));
738 for (i = 0; i < inputColumns; i++)
742 index1 = calloc(inputColumns,
sizeof(*index1));
743 index2 = calloc(inputColumns,
sizeof(*index2));
744 index = calloc(inputColumns,
sizeof(*index));
745 for (i = 0; i < inputColumns; i++)
746 index2[i] = bins - 1;
749 for (i = 0; i < inputColumns; i++)
751 row, columnIndex[i], index[i] * delta_val[i] + min_val[i], -1))
754 row, densityIndex, retrieveBinValue(bins, inputColumns, layer, index), -1))
757 }
while (advanceCounter(index, index1, index2, inputColumns));
768double retrieveBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList) {
769 while (--inputColumns) {
770 layer = layer->nextLowerLayer + indexList[0];
773 return layer->sum[indexList[0]];
776void interpolateBinValue0(
int myid) {
777 uint64_t iRow, iRow0, iRow1;
780 indexList = malloc(
sizeof(*indexList) * inputColumns);
781 point = malloc(
sizeof(*point) * inputColumns);
782 iRow0 = myid * (rows / threads);
783 if (myid == (threads - 1))
786 iRow1 = (myid + 1) * (rows / threads) - 1;
788 printf(
"Thread %d handling interpolation for rows %ld to %ld\n", myid, (
long)iRow0, (
long)iRow1);
791 for (iRow = iRow0; iRow <= iRow1; iRow++) {
792 for (j = 0; j < inputColumns; j++)
793 point[j] = data[j][iRow];
794 fillIndexArray(point, bins, inputColumns, indexList, min, delta);
795 density[iRow] = interpolateBinValue(bins, inputColumns, &binTree, point, min, delta, indexList);
800 printf(
"Finished interpolation on thread %d\n", myid);
806double interpolateBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
double *data_point,
double *min_val,
double *delta_val,
long *indexList) {
807 double value1, value2, value;
808 if (indexList[0] != (bins - 1)) {
809 if (inputColumns > 1) {
810 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
811 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] + 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
813 value1 = layer->sum[indexList[0] + 0];
814 value2 = layer->sum[indexList[0] + 1];
816 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * indexList[0]));
818 if (inputColumns > 1) {
819 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] - 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
820 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
822 value1 = layer->sum[indexList[0] - 1];
823 value2 = layer->sum[indexList[0] + 0];
825 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * (indexList[0] - 1)));
827 if (isnan(value) || isinf(value) || value < 0)
832void addDensityToBinTree0(
int myid) {
833 uint64_t iRow, iRow0, iRow1;
834 uint64_t myRowsSampled;
837 iRow0 = myid * (rows / threads);
838 if (myid == (threads - 1))
841 iRow1 = (myid + 1) * (rows / threads) - 1;
843 printf(
"Thread %d handling density addition for rows %ld to %ld\n", myid, (
long)iRow0, (
long)iRow1);
848 time0 = delapsed_time();
849 for (iRow = iRow0; iRow <= iRow1; iRow++) {
850 if ((kdeSampleFraction != 1) && (kdeSampleFraction <
drand(-1)))
853 if (verbose && (time1 = delapsed_time()) > (time0 + 10)) {
855 fprintf(stderr,
"Addition of density %f %% complete after %.0lf s\n", (100.0 * (iRow - iRow0) / (iRow1 - iRow0 + 1.0)), time1);
857 addDensityToBinTree(myid, &binTree, bins, inputColumns, min, delta, bw, iRow, rows, data,
858 weightValue ? weightValue[iRow] : 1.0, nKdeSigmas, kdeExpLimit);
861 printf(
"Finished adding density on thread %d\n", myid);
864 rowsSampledThread[myid] = myRowsSampled;
868void addDensityToBinTree(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
869 double *min_val,
double *delta_val,
double *bw, uint64_t row, uint64_t rows,
double **data,
870 double weight,
long nSigmas,
double expLimit) {
874 samplePoint = malloc(
sizeof(*samplePoint) * inputColumns);
875 for (i = 0; i < inputColumns; i++)
876 samplePoint[i] = data[i][row];
878 addDensityToBinTree1(myid, layer, bins, inputColumns, min_val, delta_val, bw, samplePoint, weight, rows, nSigmas, expLimit);
883int advanceCounter(
long *index,
long *index1,
long *index2,
long n) {
884 long i, carry, rolledOver;
887 for (i = 0; i < n; i++) {
889 if (index[i] > index2[i]) {
891 index[i] = index1[i];
896 return rolledOver != n;
899void addDensityToBinTree1(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
900 double *min_val,
double *delta_val,
double *bw,
double *data_point,
double weight,
901 uint64_t rows,
long nSigmas,
double expLimit) {
904 long *index1 = NULL, *index2 = NULL;
907 double *invSqrtBw = NULL, *invBw = NULL;
911 index = malloc(
sizeof(*index) * inputColumns);
912 index1 = malloc(
sizeof(*index1) * inputColumns);
913 index2 = malloc(
sizeof(*index2) * inputColumns);
914 invSqrtBw = malloc(
sizeof(*invSqrtBw) * inputColumns);
915 invBw = malloc(
sizeof(*invBw) * inputColumns);
917 fillIndexArray(data_point, bins, inputColumns, index1, min_val, delta_val);
918 memcpy(index2, index1,
sizeof(*index2) * inputColumns);
919 for (i = 0; i < inputColumns; i++) {
920 invBw[i] = 1. / bw[i];
921 invSqrtBw[i] = sqrt(invBw[i]);
922 deltaIndex = nSigmas * sqrt(bw[i]) / delta_val[i];
923 if ((index1[i] -= deltaIndex) < 0)
925 if ((index2[i] += deltaIndex) > (bins - 1))
926 index2[i] = bins - 1;
929 memcpy(index, index1,
sizeof(*index) * inputColumns);
930 zLimit = -2 * log(expLimit);
931 f1 = 1 / (pow(PIx2, inputColumns / 2.0) * rows);
936 for (i = 0; i < inputColumns; i++) {
937 z += sqr(data_point[i] - (index[i] * delta_val[i] + min_val[i])) * invBw[i];
941 addToBinValue(myid, bins, inputColumns, layer, index, exp(-z / 2) * p * weight);
942 }
while (advanceCounter(index, index1, index2, inputColumns));
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
float drand(long dummy)
Generate a uniform random float in [0,1].
int update_min_max(double *min, double *max, double *list, int64_t n, int32_t reset)
Updates the minimum and maximum values based on a list of doubles.
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.