74#include <gsl/gsl_statistics.h>
75#include <gsl/gsl_sort.h>
76#include <gsl/gsl_math.h>
78#if defined(linux) || (defined(_WIN32) && !defined(_MINGW))
98char *option[N_OPTIONS] = {
113#define NORM_OPTIONS 3
114char *normalizationOption[NORM_OPTIONS] = {
115 "none",
"range",
"rms"};
119 "sddslocaldensity [<inputfile>] [<outputfile>]\n"
120 " [-pipe=[input][,output]]\n"
121 " -columns=<normalizationMode>,<name>[,...]\n"
123 " -fraction=<value> |\n"
124 " -spread=<value> |\n"
125 " -kde=bins=<number>[,gridoutput=<filename>][,nsigma=<value>][,explimit=<value>]\n"
126 " [,sample=<fraction>|use=<number>][,spanPages]\n"
128 " [-output=<columnName>]\n"
129 " [-weight=<columnName>]\n"
130 " [-threads=<number>]\n"
133 " -pipe The standard SDDS Toolkit pipe option.\n"
134 " -threads The number of threads to use.\n"
135 " -columns Specifies the names of the columns to include. The names may include wildcards.\n"
136 " The normalization mode is one of \"none\", \"range\", or \"rms\".\n"
137 " Note that the normalization mode is irrelevant when fraction or spread options are used.\n"
138 " -weight Name of the column with which to weight the contributions of each point.\n"
139 " -fraction Fraction of the range to use to identify \"nearby\" points.\n"
140 " -spread Standard deviation of the weighting function as a fraction of the range.\n"
141 " -kde If specified, use n-dimensional Kernel Density Estimation instead of a point-based algorithm.\n"
142 " Highly recommended when the number of data points is large to avoid N² growth in runtime.\n"
143 " nsigma gives the number of standard deviations of the bandwidth over which to sum the\n"
144 " Gaussian factor; smaller numbers can considerably improve performance at the expense of\n"
145 " lower accuracy in the tails. Using the sample qualifier allows using a randomly-sampled\n"
146 " fraction of the data to create the density map. The use qualifier allows computing the\n"
147 " sample fraction so that approximately the indicated number of samples is used.\n"
148 " If spanPages is given, the KDE density map is created using data from all pages of the file;\n"
149 " however, the output retains the original page breakdown; useful when processing very large\n"
150 " quantities of data.\n"
151 " -output Name of the output column. Defaults to \"LocalDensity\".\n"
152 " -verbose Print progress information while running.\n\n"
153 "Program by Michael Borland. (" __DATE__
" " __TIME__
"\", SVN revision: " SVN_VERSION
")\n";
157 short normalizationMode;
163 char ***columnName,
short **normalizationMode, int32_t *nColumns);
164void normalizeData(
double *data, int64_t rows,
short mode,
double min,
double max,
int threads);
172double SilvermansBandwidth(
double data[],
long dimensions, int64_t M);
173void createBinTree(
long bins,
long inputColumns,
BIN_LAYER *layer);
174void fillIndexArray(
double *point,
long bins,
long inputColumns,
long *indexList,
double *min,
double *delta);
175void addToBinValue(
int myid,
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList,
double quantity);
176void zeroBinValues(
long bins,
long inputColumns,
BIN_LAYER *layer);
177double retrieveBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList);
178double interpolateBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
double *data,
double *min,
double *delta,
long *indexList);
179void interpolateBinValue0(
int myid);
180void addDensityToBinTree(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
181 double *min,
double *delta,
double *bw, uint64_t row, uint64_t rows,
double **data,
182 double weight,
long nSigmas,
double expLimit);
183void addDensityToBinTree1(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
184 double *min,
double *delta,
double *bw,
double *data,
double weight, uint64_t rows,
long nSigmas,
186int advanceCounter(
long *index,
long *index1,
long *index2,
long n);
188 char *densityColumn,
BIN_LAYER *layer,
double *min,
double *delta);
189void rescaleDensity(
long bins,
long inputColumns,
BIN_LAYER *layer,
double factor);
190void addDensityToBinTree0(
int myid);
192#define KDE_BINS_SEEN 0x001UL
193#define KDE_GRIDOUTPUT_SEEN 0x002UL
194#define KDE_NSIGMAS_SEEN 0x004UL
195#define KDE_EXPLIMIT_SEEN 0x008UL
196#define KDE_SAMPLE_SEEN 0x010UL
197#define KDE_USE_SEEN 0x020UL
198#define KDE_SPAN_PAGES 0x040UL
202long bins, nKdeSigmas;
203double *min, *max, *delta;
204int64_t iRow, rows, jRow, rowsSampled;
205int64_t *rowsSampledThread = NULL;
209double kdeSampleFraction;
212double *density = NULL;
213double *weightValue = NULL;
215int main(
int argc,
char **argv) {
218 int32_t nColumnLists;
219 char **inputColumn, *weightColumn;
220 short *normalizationMode;
221 double fraction, spreadFraction;
222 char *input, *output;
225 unsigned long pipeFlags;
226 SCANNED_ARG *scanned;
229 int32_t kdeNumberToUse;
231 unsigned long kdeFlags;
235 argc =
scanargs(&scanned, argc, argv);
236 if (argc < 3 || argc > (3 + N_OPTIONS))
239 output = input = NULL;
240 outputColumn = weightColumn = NULL;
244 fraction = spreadFraction = 0;
246 kdeGridOutput = NULL;
250 kdeSampleFraction = 1;
254 for (iArg = 1; iArg < argc; iArg++) {
255 if (scanned[iArg].arg_type == OPTION) {
257 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
259 if (scanned[iArg].n_items < 3)
261 columnList =
SDDS_Realloc(columnList,
sizeof(*columnList) * (nColumnLists + 1));
262 if ((columnList[nColumnLists].normalizationMode =
match_string(scanned[iArg].list[1], normalizationOption,
263 NORM_OPTIONS, 0)) == -1)
264 SDDS_Bomb(
"invalid normalization mode given");
265 columnList[nColumnLists].suppliedNames = scanned[iArg].n_items - 2;
266 columnList[nColumnLists].suppliedName =
tmalloc(
sizeof(*(columnList[nColumnLists].suppliedName)) * columnList[nColumnLists].suppliedNames);
267 for (i = 0; i < columnList[nColumnLists].suppliedNames; i++) {
268 columnList[nColumnLists].suppliedName[i] = scanned[iArg].list[i + 2];
269 scanned[iArg].list[i + 2] = NULL;
274 if (scanned[iArg].n_items != 2)
275 SDDS_Bomb(
"invalid -outputColumn syntax: give a name");
276 outputColumn = scanned[iArg].list[1];
277 scanned[iArg].list[1] = NULL;
280 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%lf", &fraction) != 1 || fraction <= 0)
281 SDDS_Bomb(
"invalid -fraction syntax: give a value greater than 0");
284 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%lf", &spreadFraction) != 1 || spreadFraction <= 0)
285 SDDS_Bomb(
"invalid -spread syntax: give a value greater than 0");
288 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1],
"%d", &threads) != 1 || threads <= 0)
289 SDDS_Bomb(
"invalid -threads syntax: give a value greater than 0");
292 if (scanned[iArg].n_items < 2)
293 SDDS_Bomb(
"invalid -kde syntax: give number of bins");
294 scanned[iArg].n_items -= 1;
296 if (!
scanItemList(&kdeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
297 "bins",
SDDS_LONG, &bins, 1, KDE_BINS_SEEN,
298 "gridoutput",
SDDS_STRING, &kdeGridOutput, 1, KDE_GRIDOUTPUT_SEEN,
299 "nsigmas",
SDDS_LONG, &nKdeSigmas, 1, KDE_NSIGMAS_SEEN,
300 "explimit",
SDDS_DOUBLE, &kdeExpLimit, 1, KDE_EXPLIMIT_SEEN,
301 "sample",
SDDS_DOUBLE, &kdeSampleFraction, 1, KDE_SAMPLE_SEEN,
302 "use",
SDDS_LONG, &kdeNumberToUse, 1, KDE_USE_SEEN,
303 "spanpages", -1, NULL, 0, KDE_SPAN_PAGES,
307 SDDS_Bomb(
"Number of bins should be at least 3 for KDE");
309 SDDS_Bomb(
"Number of sigmas should be at least 1 for KDE");
310 if (kdeExpLimit <= 0 || kdeExpLimit > 1)
311 SDDS_Bomb(
"Exponential limit for KDE must be (0, 1].");
312 if (kdeSampleFraction <= 0 || kdeSampleFraction > 1)
313 SDDS_Bomb(
"Sample fraction for KDE must be (0, 1].");
314 if (kdeFlags & KDE_USE_SEEN) {
315 if (kdeNumberToUse <= 1)
316 SDDS_Bomb(
"Number to use for KDE must be at least 1.");
317 if (kdeFlags & KDE_SAMPLE_SEEN)
318 SDDS_Bomb(
"Give sample fraction or number to use for KDE, not both.");
322 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
329 if (scanned[iArg].n_items != 2)
330 SDDS_Bomb(
"invalid -weight syntax: give the name of a column");
331 weightColumn = scanned[iArg].list[1];
334 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
340 input = scanned[iArg].list[0];
342 output = scanned[iArg].list[0];
349 if (((fraction > 0 ? 1 : 0) + (spreadFraction > 0 ? 1 : 0) + (bins > 0)) > 1)
350 SDDS_Bomb(
"give only one of -fraction, -spread, or -kde");
354 if (kdeFlags & KDE_SPAN_PAGES && pipeFlags)
355 SDDS_Bomb(
"-kde=spanPages is incompatible with -pipe option");
356 if (!kdeFlags && weightColumn)
357 SDDS_Bomb(
"-weight is only supported for -kde at present");
360 SDDS_Bomb(
"supply the names of columns to include with the -columns option");
365 if (!resolveColumnNames(&SDDSin, columnList, nColumnLists, &inputColumn, &normalizationMode, &inputColumns))
374 cp_str(&outputColumn,
"LocalDensity");
387 for (i = 0; i < inputColumns; i++)
398 data =
tmalloc(
sizeof(*data) * inputColumns);
403 binTree.nextLowerLayer = NULL;
404 createBinTree(bins, inputColumns, &binTree);
407 min = calloc(inputColumns,
sizeof(*min));
408 max = calloc(inputColumns,
sizeof(*max));
409 delta = malloc(
sizeof(*delta) * inputColumns);
410#if !defined(NOTHREADS)
411 omp_set_num_threads(threads);
413 bw = malloc(
sizeof(*bw) * inputColumns);
416 while ((kdeFlags & KDE_SPAN_PAGES && pass < 4) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
418 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
424 fprintf(stderr,
"Processing page %ld (pass %ld) with %" PRId64
" rows\n", readCode, pass, rows);
428 for (i = 0; i < inputColumns; i++) {
432 update_min_max(&min[i], &max[i], data[i], rows, kdeFlags & KDE_SPAN_PAGES ? readCode == 1 : 1);
434 delta[i] = (max[i] - min[i]) / (bins - 1);
438 fprintf(stderr,
"%s:[%le, %le] delta=%le\n", inputColumn[i], min[i], max[i], delta[i]);
441 normalizeData(data[i], rows, normalizationMode[i], min[i], max[i], threads);
444 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
445 (kdeFlags & KDE_SPAN_PAGES && pass == 2 && readCode == 1)) {
446 for (i = 0; i < inputColumns; i++) {
447 bw[i] = SilvermansBandwidth(data[i], inputColumns, rows);
449 fprintf(stderr,
"Bandwidth for %s is %le\n", inputColumn[i], bw[i]);
453 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
454 (kdeFlags & KDE_SPAN_PAGES && pass == 2)) {
456 fprintf(stderr,
"Summing density over grid.\n");
458 if (kdeFlags & KDE_USE_SEEN) {
459 if ((kdeSampleFraction = (1.0 * kdeNumberToUse) / rows) > 1)
460 kdeSampleFraction = 1;
462 rowsSampledThread = calloc(threads,
sizeof(*rowsSampledThread));
466#if defined(NOTHREADS)
467 addDensityToBinTree0(0);
469 addDensityToBinTree0(omp_get_thread_num());
472 for (i = 0; i < threads; i++) {
473 rowsSampled += rowsSampledThread[i];
475 free(rowsSampledThread);
477 if (rowsSampled != rows) {
479 fprintf(stderr,
"%" PRId64
" of %" PRId64
" rows sampled\n", rowsSampled, rows);
480 rescaleDensity(bins, inputColumns, &binTree, (1.0 * rows / rowsSampled));
484 fprintf(stderr,
"Dumping KDE grid\n");
485 dumpBinValues(&SDDSkde, &SDDSin, bins, inputColumn, inputColumns, outputColumn, &binTree, min, delta);
489 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
491 density = malloc(
sizeof(*density) * rows);
493 fprintf(stderr,
"Interpolating density\n");
496#if defined(NOTHREADS)
497 interpolateBinValue0(0);
499 interpolateBinValue0(omp_get_thread_num());
506 if (!(kdeFlags & KDE_SPAN_PAGES)) {
508 fprintf(stderr,
"Setting density map to zero\n");
509 zeroBinValues(bins, inputColumns, &binTree);
512 for (i = 0; i < inputColumns; i++)
517 double *count, *epsilon, distance;
518 count =
tmalloc(rows *
sizeof(*count));
519 epsilon =
tmalloc(inputColumns *
sizeof(*epsilon));
520 for (i = 0; i < inputColumns; i++)
521 epsilon[i] = fraction * (max[i] - min[i]);
522 for (iRow = 0; iRow < rows; iRow++) {
524 for (jRow = 0; jRow < rows; jRow++) {
527 for (i = 0; i < inputColumns; i++) {
528 distance = fabs(data[i][iRow] - data[i][jRow]);
529 if (distance > epsilon[i]) {
538 count[iRow] = nInside;
544 }
else if (spreadFraction > 0) {
545 double *count, *spread;
546 count =
tmalloc(rows *
sizeof(*count));
547 spread =
tmalloc(
sizeof(*spread) * inputColumns);
548 for (i = 0; i < inputColumns; i++) {
549 spread[i] = spreadFraction * (max[i] - min[i]);
551 for (iRow = 0; iRow < rows; iRow++) {
553 for (jRow = 0; jRow < rows; jRow++) {
555 for (i = 0; i < inputColumns; i++)
556 term *= exp(-sqr((data[i][iRow] - data[i][jRow]) / spread[i]) / 2);
566 distance =
tmalloc(rows *
sizeof(*distance));
567 for (iRow = 0; iRow < rows; iRow++) {
570 for (jRow = 0; jRow < rows; jRow++) {
572 for (i = 0; i < inputColumns; i++)
573 sum += sqr(data[i][iRow] - data[i][jRow]);
574 distance[iRow] += sqrt(sum);
577 for (iRow = 0; iRow < rows; iRow++)
578 distance[iRow] = rows / distance[iRow];
584 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
587 "sddslocaldensityThreads", (
short)threads,
594 if (kdeFlags & KDE_SPAN_PAGES && pass != 3) {
596 fprintf(stderr,
"Closing input file %s and reopening for second pass\n", input);
613 printf(
"Execution completed in %le s\n",
delapsed_time() - startTime);
618 char ***columnName,
short **normalizationMode, int32_t *nColumns) {
620 char **newColumnName;
625 *normalizationMode = NULL;
628 for (i = 0; i < nColumnLists; i++) {
630 for (j = 0; j < columnList[i].suppliedNames; j++) {
635 SDDS_SetError(
"no columns found for one or more -column options");
638 for (j = 0; j < newColumns; j++) {
640 for (k = 0; k < *nColumns; k++) {
641 if (strcmp(newColumnName[j], (*columnName)[k]) == 0) {
643 (*normalizationMode)[k] = columnList[i].normalizationMode;
648 *columnName =
SDDS_Realloc(*columnName,
sizeof(**columnName) * (*nColumns + 1));
649 (*columnName)[*nColumns] = newColumnName[j];
650 *normalizationMode =
SDDS_Realloc(*normalizationMode,
sizeof(**normalizationMode) * (*nColumns + 1));
651 (*normalizationMode)[*nColumns] = columnList[i].normalizationMode;
667void normalizeData(
double *data, int64_t rows,
short mode,
double min_val,
double max_val,
int threads) {
669 double offset, rms, divisor;
673 if (min_val == max_val)
674 SDDS_Bomb(
"attempt to normalize data with zero range");
675 divisor = max_val - min_val;
676 offset = max_val - min_val;
681 SDDS_Bomb(
"attempt to normalize data with zero rms");
689 for (i = 0; i < rows; i++)
690 data[i] = (data[i] - offset) / divisor;
693double SilvermansBandwidth(
double data[],
long dimensions, int64_t M) {
695 return pow(4.0 / (dimensions + 2.0) / M, 2.0 / (dimensions + 4.0)) * ipow2(gsl_stats_sd(data, 1, M));
698void createBinTree(
long bins,
long inputColumns,
BIN_LAYER *layer) {
699 if (inputColumns == 1) {
700 layer->sum = calloc(bins,
sizeof(*(layer->sum)));
703 layer->nextLowerLayer = calloc(bins,
sizeof(
BIN_LAYER));
704 for (i = 0; i < bins; i++)
705 createBinTree(bins, inputColumns - 1, layer->nextLowerLayer + i);
709void fillIndexArray(
double *point,
long bins,
long inputColumns,
long *indexList,
double *min_val,
double *delta_val) {
713 for (i = 0; i < inputColumns; i++) {
714 j = (point[i] - min_val[i]) / delta_val[i];
717 else if (j > bins_m1)
723void addToBinValue(
int myid,
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList,
double quantity) {
724 while (inputColumns--) {
725 if (inputColumns == 0) {
727 layer->sum[indexList[0]] += quantity;
729 layer = layer->nextLowerLayer + indexList[0];
735void rescaleDensity(
long bins,
long inputColumns,
BIN_LAYER *layer,
double factor) {
737 if (inputColumns == 1) {
738 for (i = 0; i < bins; i++)
739 layer->sum[i] *= factor;
741 for (i = 0; i < bins; i++)
742 rescaleDensity(bins, inputColumns - 1, layer->nextLowerLayer + i, factor);
746void zeroBinValues(
long bins,
long inputColumns,
BIN_LAYER *layer) {
748 if (inputColumns == 1) {
749 for (i = 0; i < bins; i++)
752 for (i = 0; i < bins; i++)
753 zeroBinValues(bins, inputColumns - 1, layer->nextLowerLayer + i);
758 long bins,
char **inputColumn,
long inputColumns,
759 char *densityColumn,
BIN_LAYER *layer,
double *min_val,
double *delta_val) {
761 long *index, *index1, *index2;
763 long *columnIndex, densityIndex;
769 columnIndex = calloc(inputColumns,
sizeof(*columnIndex));
770 for (i = 0; i < inputColumns; i++)
774 index1 = calloc(inputColumns,
sizeof(*index1));
775 index2 = calloc(inputColumns,
sizeof(*index2));
776 index = calloc(inputColumns,
sizeof(*index));
777 for (i = 0; i < inputColumns; i++)
778 index2[i] = bins - 1;
781 for (i = 0; i < inputColumns; i++)
783 row, columnIndex[i], index[i] * delta_val[i] + min_val[i], -1))
786 row, densityIndex, retrieveBinValue(bins, inputColumns, layer, index), -1))
789 }
while (advanceCounter(index, index1, index2, inputColumns));
800double retrieveBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
long *indexList) {
801 while (--inputColumns) {
802 layer = layer->nextLowerLayer + indexList[0];
805 return layer->sum[indexList[0]];
808void interpolateBinValue0(
int myid) {
809 uint64_t iRow, iRow0, iRow1;
812 indexList = malloc(
sizeof(*indexList) * inputColumns);
813 point = malloc(
sizeof(*point) * inputColumns);
814 iRow0 = myid * (rows / threads);
815 if (myid == (threads - 1))
818 iRow1 = (myid + 1) * (rows / threads) - 1;
820 printf(
"Thread %d handling interpolation for rows %ld to %ld\n", myid, (
long)iRow0, (
long)iRow1);
823 for (iRow = iRow0; iRow <= iRow1; iRow++) {
824 for (j = 0; j < inputColumns; j++)
825 point[j] = data[j][iRow];
826 fillIndexArray(point, bins, inputColumns, indexList, min, delta);
827 density[iRow] = interpolateBinValue(bins, inputColumns, &binTree, point, min, delta, indexList);
832 printf(
"Finished interpolation on thread %d\n", myid);
838double interpolateBinValue(
long bins,
long inputColumns,
BIN_LAYER *layer,
double *data_point,
double *min_val,
double *delta_val,
long *indexList) {
839 double value1, value2, value;
840 if (indexList[0] != (bins - 1)) {
841 if (inputColumns > 1) {
842 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
843 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] + 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
845 value1 = layer->sum[indexList[0] + 0];
846 value2 = layer->sum[indexList[0] + 1];
848 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * indexList[0]));
850 if (inputColumns > 1) {
851 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] - 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
852 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
854 value1 = layer->sum[indexList[0] - 1];
855 value2 = layer->sum[indexList[0] + 0];
857 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * (indexList[0] - 1)));
859 if (isnan(value) || isinf(value) || value < 0)
864void addDensityToBinTree0(
int myid) {
865 uint64_t iRow, iRow0, iRow1;
866 uint64_t myRowsSampled;
869 iRow0 = myid * (rows / threads);
870 if (myid == (threads - 1))
873 iRow1 = (myid + 1) * (rows / threads) - 1;
875 printf(
"Thread %d handling density addition for rows %ld to %ld\n", myid, (
long)iRow0, (
long)iRow1);
881 for (iRow = iRow0; iRow <= iRow1; iRow++) {
882 if ((kdeSampleFraction != 1) && (kdeSampleFraction <
drand(-1)))
887 fprintf(stderr,
"Addition of density %f %% complete after %.0lf s\n", (100.0 * (iRow - iRow0) / (iRow1 - iRow0 + 1.0)), time1);
889 addDensityToBinTree(myid, &binTree, bins, inputColumns, min, delta, bw, iRow, rows, data,
890 weightValue ? weightValue[iRow] : 1.0, nKdeSigmas, kdeExpLimit);
893 printf(
"Finished adding density on thread %d\n", myid);
896 rowsSampledThread[myid] = myRowsSampled;
900void addDensityToBinTree(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
901 double *min_val,
double *delta_val,
double *bw, uint64_t row, uint64_t rows,
double **data,
902 double weight,
long nSigmas,
double expLimit) {
906 samplePoint = malloc(
sizeof(*samplePoint) * inputColumns);
907 for (i = 0; i < inputColumns; i++)
908 samplePoint[i] = data[i][row];
910 addDensityToBinTree1(myid, layer, bins, inputColumns, min_val, delta_val, bw, samplePoint, weight, rows, nSigmas, expLimit);
915int advanceCounter(
long *index,
long *index1,
long *index2,
long n) {
916 long i, carry, rolledOver;
919 for (i = 0; i < n; i++) {
921 if (index[i] > index2[i]) {
923 index[i] = index1[i];
928 return rolledOver != n;
931void addDensityToBinTree1(
int myid,
BIN_LAYER *layer,
long bins,
long inputColumns,
932 double *min_val,
double *delta_val,
double *bw,
double *data_point,
double weight,
933 uint64_t rows,
long nSigmas,
double expLimit) {
936 long *index1 = NULL, *index2 = NULL;
939 double *invSqrtBw = NULL, *invBw = NULL;
943 index = malloc(
sizeof(*index) * inputColumns);
944 index1 = malloc(
sizeof(*index1) * inputColumns);
945 index2 = malloc(
sizeof(*index2) * inputColumns);
946 invSqrtBw = malloc(
sizeof(*invSqrtBw) * inputColumns);
947 invBw = malloc(
sizeof(*invBw) * inputColumns);
949 fillIndexArray(data_point, bins, inputColumns, index1, min_val, delta_val);
950 memcpy(index2, index1,
sizeof(*index2) * inputColumns);
951 for (i = 0; i < inputColumns; i++) {
952 invBw[i] = 1. / bw[i];
953 invSqrtBw[i] = sqrt(invBw[i]);
954 deltaIndex = nSigmas * sqrt(bw[i]) / delta_val[i];
955 if ((index1[i] -= deltaIndex) < 0)
957 if ((index2[i] += deltaIndex) > (bins - 1))
958 index2[i] = bins - 1;
961 memcpy(index, index1,
sizeof(*index) * inputColumns);
962 zLimit = -2 * log(expLimit);
963 f1 = 1 / (pow(PIx2, inputColumns / 2.0) * rows);
968 for (i = 0; i < inputColumns; i++) {
969 z += sqr(data_point[i] - (index[i] * delta_val[i] + min_val[i])) * invBw[i];
973 addToBinValue(myid, bins, inputColumns, layer, index, exp(-z / 2) * p * weight);
974 }
while (advanceCounter(index, index1, index2, inputColumns));
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
float drand(long dummy)
Generate a uniform random float in [0,1].
int update_min_max(double *min, double *max, double *list, int64_t n, int32_t reset)
Updates the minimum and maximum values based on a list of doubles.
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
double delapsed_time(void)
Calculates the elapsed clock time since the last initialization as a numerical value.