SDDSlib
Loading...
Searching...
No Matches
sddslocaldensity.c File Reference

Computes the local density of data points using various methods such as fraction, spread, or Kernel Density Estimation (KDE). More...

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include <ctype.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_sort.h>
#include <gsl/gsl_math.h>

Go to the source code of this file.

Classes

struct  COLUMN_LIST
 
struct  bin_layer
 

Macros

#define NOTHREADS   1
 
#define NORM_NONE   0
 
#define NORM_RANGE   1
 
#define NORM_RMS   2
 
#define NORM_OPTIONS   3
 
#define KDE_BINS_SEEN   0x001UL
 
#define KDE_GRIDOUTPUT_SEEN   0x002UL
 
#define KDE_NSIGMAS_SEEN   0x004UL
 
#define KDE_EXPLIMIT_SEEN   0x008UL
 
#define KDE_SAMPLE_SEEN   0x010UL
 
#define KDE_USE_SEEN   0x020UL
 
#define KDE_SPAN_PAGES   0x040UL
 

Typedefs

typedef struct bin_layer BIN_LAYER
 

Enumerations

enum  option_type {
  CLO_COLUMNS , CLO_PIPE , CLO_OUTPUT , CLO_FRACTION ,
  CLO_SPREAD , CLO_KDE , CLO_VERBOSE , CLO_THREADS ,
  CLO_WEIGHT , N_OPTIONS
}
 

Functions

long resolveColumnNames (SDDS_DATASET *SDDSin, COLUMN_LIST *columnList, int32_t nColumnLists, char ***columnName, short **normalizationMode, int32_t *nColumns)
 
void normalizeData (double *data, int64_t rows, short mode, double min, double max, int threads)
 
double SilvermansBandwidth (double data[], long dimensions, int64_t M)
 
void createBinTree (long bins, long inputColumns, BIN_LAYER *layer)
 
void fillIndexArray (double *point, long bins, long inputColumns, long *indexList, double *min, double *delta)
 
void addToBinValue (int myid, long bins, long inputColumns, BIN_LAYER *layer, long *indexList, double quantity)
 
void zeroBinValues (long bins, long inputColumns, BIN_LAYER *layer)
 
double retrieveBinValue (long bins, long inputColumns, BIN_LAYER *layer, long *indexList)
 
double interpolateBinValue (long bins, long inputColumns, BIN_LAYER *layer, double *data, double *min, double *delta, long *indexList)
 
void interpolateBinValue0 (int myid)
 
void addDensityToBinTree (int myid, BIN_LAYER *layer, long bins, long inputColumns, double *min, double *delta, double *bw, uint64_t row, uint64_t rows, double **data, double weight, long nSigmas, double expLimit)
 
void addDensityToBinTree1 (int myid, BIN_LAYER *layer, long bins, long inputColumns, double *min, double *delta, double *bw, double *data, double weight, uint64_t rows, long nSigmas, double expLimit)
 
int advanceCounter (long *index, long *index1, long *index2, long n)
 
void dumpBinValues (SDDS_DATASET *SDDSkde, SDDS_DATASET *SDDSin, long bins, char **inputColumn, long inputColumns, char *densityColumn, BIN_LAYER *layer, double *min, double *delta)
 
void rescaleDensity (long bins, long inputColumns, BIN_LAYER *layer, double factor)
 
void addDensityToBinTree0 (int myid)
 
int main (int argc, char **argv)
 

Variables

char * option [N_OPTIONS]
 
char * normalizationOption [NORM_OPTIONS]
 
static char * USAGE
 
int threads = 1
 
BIN_LAYER binTree
 
long bins
 
long nKdeSigmas
 
double * min
 
double * max
 
double * delta
 
int64_t iRow
 
int64_t rows
 
int64_t jRow
 
int64_t rowsSampled
 
int64_t * rowsSampledThread = NULL
 
double ** data
 
double kdeExpLimit
 
int verbose
 
double kdeSampleFraction
 
int32_t inputColumns
 
double * bw = NULL
 
double * density = NULL
 
double * weightValue = NULL
 

Detailed Description

Computes the local density of data points using various methods such as fraction, spread, or Kernel Density Estimation (KDE).

Usage

sddslocaldensity [<inputfile>] [<outputfile>] [-pipe=[input][,output]] [-threads=<number>]
{-fraction=<value> | -spread=<value> |
-kde=bins=<number>[,gridoutput=<filename>][,nsigma=<value>][,explimit=<value>]
[,sample=<fraction>|use=<number>][,spanPages]}}
-columns=<normalizationMode>,<name>[,...] [-output=<columnName>]
[-weight=<columnName>] [-verbose]

Options

-pipe -threads <number> -columns <normalizationMode>,<name>[,...] The normalization mode is one of "none", "range", or "rms". Note that the normalization mode is irrelevant when fraction or spread options are used. -weight <columnName> -fraction

-spread

-kde bins=<number> gridoutput=<filename> nsigma=

explimit=

sample=<fraction> or use=<number> spanPages -output <columnName> -verbose

License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday, N. Kuklev

Definition in file sddslocaldensity.c.

Macro Definition Documentation

◆ KDE_BINS_SEEN

#define KDE_BINS_SEEN   0x001UL

Definition at line 160 of file sddslocaldensity.c.

◆ KDE_EXPLIMIT_SEEN

#define KDE_EXPLIMIT_SEEN   0x008UL

Definition at line 163 of file sddslocaldensity.c.

◆ KDE_GRIDOUTPUT_SEEN

#define KDE_GRIDOUTPUT_SEEN   0x002UL

Definition at line 161 of file sddslocaldensity.c.

◆ KDE_NSIGMAS_SEEN

#define KDE_NSIGMAS_SEEN   0x004UL

Definition at line 162 of file sddslocaldensity.c.

◆ KDE_SAMPLE_SEEN

#define KDE_SAMPLE_SEEN   0x010UL

Definition at line 164 of file sddslocaldensity.c.

◆ KDE_SPAN_PAGES

#define KDE_SPAN_PAGES   0x040UL

Definition at line 166 of file sddslocaldensity.c.

◆ KDE_USE_SEEN

#define KDE_USE_SEEN   0x020UL

Definition at line 165 of file sddslocaldensity.c.

◆ NORM_NONE

#define NORM_NONE   0

Definition at line 85 of file sddslocaldensity.c.

◆ NORM_OPTIONS

#define NORM_OPTIONS   3

Definition at line 88 of file sddslocaldensity.c.

◆ NORM_RANGE

#define NORM_RANGE   1

Definition at line 86 of file sddslocaldensity.c.

◆ NORM_RMS

#define NORM_RMS   2

Definition at line 87 of file sddslocaldensity.c.

◆ NOTHREADS

#define NOTHREADS   1

Definition at line 56 of file sddslocaldensity.c.

Enumeration Type Documentation

◆ option_type

enum option_type

Definition at line 60 of file sddslocaldensity.c.

60 {
61 CLO_COLUMNS,
62 CLO_PIPE,
63 CLO_OUTPUT,
64 CLO_FRACTION,
65 CLO_SPREAD,
66 CLO_KDE,
67 CLO_VERBOSE,
68 CLO_THREADS,
69 CLO_WEIGHT,
70 N_OPTIONS
71};

Function Documentation

◆ addDensityToBinTree()

void addDensityToBinTree ( int myid,
BIN_LAYER * layer,
long bins,
long inputColumns,
double * min,
double * delta,
double * bw,
uint64_t row,
uint64_t rows,
double ** data,
double weight,
long nSigmas,
double expLimit )

Definition at line 868 of file sddslocaldensity.c.

870 {
871 double *samplePoint;
872 long i;
873
874 samplePoint = malloc(sizeof(*samplePoint) * inputColumns);
875 for (i = 0; i < inputColumns; i++)
876 samplePoint[i] = data[i][row];
877
878 addDensityToBinTree1(myid, layer, bins, inputColumns, min_val, delta_val, bw, samplePoint, weight, rows, nSigmas, expLimit);
879
880 free(samplePoint);
881}

◆ addDensityToBinTree0()

void addDensityToBinTree0 ( int myid)

Definition at line 832 of file sddslocaldensity.c.

832 {
833 uint64_t iRow, iRow0, iRow1;
834 uint64_t myRowsSampled;
835 double time0, time1;
836
837 iRow0 = myid * (rows / threads);
838 if (myid == (threads - 1))
839 iRow1 = rows - 1;
840 else
841 iRow1 = (myid + 1) * (rows / threads) - 1;
842 if (verbose) {
843 printf("Thread %d handling density addition for rows %ld to %ld\n", myid, (long)iRow0, (long)iRow1);
844 fflush(stdout);
845 }
846
847 myRowsSampled = 0;
848 time0 = delapsed_time();
849 for (iRow = iRow0; iRow <= iRow1; iRow++) {
850 if ((kdeSampleFraction != 1) && (kdeSampleFraction < drand(-1)))
851 continue;
852 myRowsSampled++;
853 if (verbose && (time1 = delapsed_time()) > (time0 + 10)) {
854 time0 = time1;
855 fprintf(stderr, "Addition of density %f %% complete after %.0lf s\n", (100.0 * (iRow - iRow0) / (iRow1 - iRow0 + 1.0)), time1);
856 }
857 addDensityToBinTree(myid, &binTree, bins, inputColumns, min, delta, bw, iRow, rows, data,
858 weightValue ? weightValue[iRow] : 1.0, nKdeSigmas, kdeExpLimit);
859 }
860 if (verbose) {
861 printf("Finished adding density on thread %d\n", myid);
862 fflush(stdout);
863 }
864 rowsSampledThread[myid] = myRowsSampled;
865 return;
866}
float drand(long dummy)
Generate a uniform random float in [0,1].
Definition drand.c:41

◆ addDensityToBinTree1()

void addDensityToBinTree1 ( int myid,
BIN_LAYER * layer,
long bins,
long inputColumns,
double * min,
double * delta,
double * bw,
double * data,
double weight,
uint64_t rows,
long nSigmas,
double expLimit )

Definition at line 899 of file sddslocaldensity.c.

901 {
902 long i;
903 /* index limits in each dimension */
904 long *index1 = NULL, *index2 = NULL;
905 long *index = NULL;
906 /* allows multiplying in the inner loop instead of dividing (for each thread) */
907 double *invSqrtBw = NULL, *invBw = NULL;
908 long deltaIndex;
909 double zLimit, f1;
910
911 index = malloc(sizeof(*index) * inputColumns);
912 index1 = malloc(sizeof(*index1) * inputColumns);
913 index2 = malloc(sizeof(*index2) * inputColumns);
914 invSqrtBw = malloc(sizeof(*invSqrtBw) * inputColumns);
915 invBw = malloc(sizeof(*invBw) * inputColumns);
916
917 fillIndexArray(data_point, bins, inputColumns, index1, min_val, delta_val);
918 memcpy(index2, index1, sizeof(*index2) * inputColumns);
919 for (i = 0; i < inputColumns; i++) {
920 invBw[i] = 1. / bw[i];
921 invSqrtBw[i] = sqrt(invBw[i]);
922 deltaIndex = nSigmas * sqrt(bw[i]) / delta_val[i]; /* sum over +/-nSigmas*sigma */
923 if ((index1[i] -= deltaIndex) < 0)
924 index1[i] = 0;
925 if ((index2[i] += deltaIndex) > (bins - 1))
926 index2[i] = bins - 1;
927 }
928
929 memcpy(index, index1, sizeof(*index) * inputColumns);
930 zLimit = -2 * log(expLimit);
931 f1 = 1 / (pow(PIx2, inputColumns / 2.0) * rows);
932 do {
933 double z, p;
934 z = 0;
935 p = f1;
936 for (i = 0; i < inputColumns; i++) {
937 z += sqr(data_point[i] - (index[i] * delta_val[i] + min_val[i])) * invBw[i];
938 p *= invSqrtBw[i];
939 }
940 if (z < zLimit)
941 addToBinValue(myid, bins, inputColumns, layer, index, exp(-z / 2) * p * weight);
942 } while (advanceCounter(index, index1, index2, inputColumns));
943
944 free(index);
945 free(index1);
946 free(index2);
947 free(invSqrtBw);
948 free(invBw);
949}

◆ addToBinValue()

void addToBinValue ( int myid,
long bins,
long inputColumns,
BIN_LAYER * layer,
long * indexList,
double quantity )

Definition at line 691 of file sddslocaldensity.c.

691 {
692 while (inputColumns--) {
693 if (inputColumns == 0) {
694#pragma omp atomic
695 layer->sum[indexList[0]] += quantity;
696 } else {
697 layer = layer->nextLowerLayer + indexList[0];
698 indexList++;
699 }
700 }
701}

◆ advanceCounter()

int advanceCounter ( long * index,
long * index1,
long * index2,
long n )

Definition at line 883 of file sddslocaldensity.c.

883 {
884 long i, carry, rolledOver;
885 carry = 1;
886 rolledOver = 0;
887 for (i = 0; i < n; i++) {
888 index[i] += carry;
889 if (index[i] > index2[i]) {
890 carry = 1;
891 index[i] = index1[i];
892 rolledOver += 1;
893 } else
894 carry = 0;
895 }
896 return rolledOver != n;
897}

◆ createBinTree()

void createBinTree ( long bins,
long inputColumns,
BIN_LAYER * layer )

Definition at line 666 of file sddslocaldensity.c.

666 {
667 if (inputColumns == 1) {
668 layer->sum = calloc(bins, sizeof(*(layer->sum)));
669 } else {
670 long i;
671 layer->nextLowerLayer = calloc(bins, sizeof(BIN_LAYER));
672 for (i = 0; i < bins; i++)
673 createBinTree(bins, inputColumns - 1, layer->nextLowerLayer + i);
674 }
675}

◆ dumpBinValues()

void dumpBinValues ( SDDS_DATASET * SDDSkde,
SDDS_DATASET * SDDSin,
long bins,
char ** inputColumn,
long inputColumns,
char * densityColumn,
BIN_LAYER * layer,
double * min,
double * delta )

Definition at line 725 of file sddslocaldensity.c.

727 {
728 long i;
729 long *index, *index1, *index2;
730 int64_t row;
731 long *columnIndex, densityIndex;
732
733 if (!SDDS_StartPage(SDDSkde, (int64_t)ipow(bins, inputColumns)) ||
734 !SDDS_CopyParameters(SDDSkde, SDDSin))
735 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
736
737 columnIndex = calloc(inputColumns, sizeof(*columnIndex));
738 for (i = 0; i < inputColumns; i++)
739 columnIndex[i] = SDDS_GetColumnIndex(SDDSkde, inputColumn[i]);
740 densityIndex = SDDS_GetColumnIndex(SDDSkde, densityColumn);
741
742 index1 = calloc(inputColumns, sizeof(*index1));
743 index2 = calloc(inputColumns, sizeof(*index2));
744 index = calloc(inputColumns, sizeof(*index));
745 for (i = 0; i < inputColumns; i++)
746 index2[i] = bins - 1;
747 row = 0;
748 do {
749 for (i = 0; i < inputColumns; i++)
750 if (!SDDS_SetRowValues(SDDSkde, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE,
751 row, columnIndex[i], index[i] * delta_val[i] + min_val[i], -1))
752 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
753 if (!SDDS_SetRowValues(SDDSkde, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE,
754 row, densityIndex, retrieveBinValue(bins, inputColumns, layer, index), -1))
755 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
756 row++;
757 } while (advanceCounter(index, index1, index2, inputColumns));
758
759 if (!SDDS_WritePage(SDDSkde))
760 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
761
762 free(columnIndex);
763 free(index1);
764 free(index2);
765 free(index);
766}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_GetColumnIndex(SDDS_DATASET *SDDS_dataset, char *name)
Retrieves the index of a named column in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
double ipow(const double x, const int64_t p)
Compute x raised to the power p (x^p).
Definition ipow.c:33

◆ fillIndexArray()

void fillIndexArray ( double * point,
long bins,
long inputColumns,
long * indexList,
double * min,
double * delta )

Definition at line 677 of file sddslocaldensity.c.

677 {
678 long i, j, bins_m1;
679
680 bins_m1 = bins - 1;
681 for (i = 0; i < inputColumns; i++) {
682 j = (point[i] - min_val[i]) / delta_val[i];
683 if (j < 0)
684 j = 0;
685 else if (j > bins_m1)
686 j = bins_m1;
687 indexList[i] = j;
688 }
689}

◆ interpolateBinValue()

double interpolateBinValue ( long bins,
long inputColumns,
BIN_LAYER * layer,
double * data,
double * min,
double * delta,
long * indexList )

Definition at line 806 of file sddslocaldensity.c.

806 {
807 double value1, value2, value;
808 if (indexList[0] != (bins - 1)) {
809 if (inputColumns > 1) {
810 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
811 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] + 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
812 } else {
813 value1 = layer->sum[indexList[0] + 0];
814 value2 = layer->sum[indexList[0] + 1];
815 }
816 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * indexList[0]));
817 } else {
818 if (inputColumns > 1) {
819 value1 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0] - 1, data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
820 value2 = interpolateBinValue(bins, inputColumns - 1, layer->nextLowerLayer + indexList[0], data_point + 1, min_val + 1, delta_val + 1, indexList + 1);
821 } else {
822 value1 = layer->sum[indexList[0] - 1];
823 value2 = layer->sum[indexList[0] + 0];
824 }
825 value = value1 + (value2 - value1) / delta_val[0] * (data_point[0] - (min_val[0] + delta_val[0] * (indexList[0] - 1)));
826 }
827 if (isnan(value) || isinf(value) || value < 0)
828 value = 0;
829 return value;
830}

◆ interpolateBinValue0()

void interpolateBinValue0 ( int myid)

Definition at line 776 of file sddslocaldensity.c.

776 {
777 uint64_t iRow, iRow0, iRow1;
778 long *indexList, j;
779 double *point;
780 indexList = malloc(sizeof(*indexList) * inputColumns);
781 point = malloc(sizeof(*point) * inputColumns);
782 iRow0 = myid * (rows / threads);
783 if (myid == (threads - 1))
784 iRow1 = rows - 1;
785 else
786 iRow1 = (myid + 1) * (rows / threads) - 1;
787 if (verbose) {
788 printf("Thread %d handling interpolation for rows %ld to %ld\n", myid, (long)iRow0, (long)iRow1);
789 fflush(stdout);
790 }
791 for (iRow = iRow0; iRow <= iRow1; iRow++) {
792 for (j = 0; j < inputColumns; j++)
793 point[j] = data[j][iRow];
794 fillIndexArray(point, bins, inputColumns, indexList, min, delta);
795 density[iRow] = interpolateBinValue(bins, inputColumns, &binTree, point, min, delta, indexList);
796 }
797 free(point);
798 free(indexList);
799 if (verbose) {
800 printf("Finished interpolation on thread %d\n", myid);
801 fflush(stdout);
802 }
803 return;
804}

◆ main()

int main ( int argc,
char ** argv )

Definition at line 183 of file sddslocaldensity.c.

183 {
184 int iArg;
185 COLUMN_LIST *columnList;
186 int32_t nColumnLists;
187 char **inputColumn, *weightColumn;
188 short *normalizationMode;
189 double fraction, spreadFraction;
190 char *input, *output;
191 char *outputColumn;
192 long i, readCode;
193 unsigned long pipeFlags;
194 SCANNED_ARG *scanned;
195 SDDS_DATASET SDDSin, SDDSout, SDDSkde;
196 char *kdeGridOutput;
197 int32_t kdeNumberToUse;
198 long pass;
199 unsigned long kdeFlags;
200 double startTime;
201
203 argc = scanargs(&scanned, argc, argv);
204 if (argc < 3 || argc > (3 + N_OPTIONS))
205 bomb(NULL, USAGE);
206
207 output = input = NULL;
208 outputColumn = weightColumn = NULL;
209 columnList = NULL;
210 nColumnLists = 0;
211 pipeFlags = 0;
212 fraction = spreadFraction = 0;
213 bins = 0;
214 kdeGridOutput = NULL;
215 verbose = 0;
216 nKdeSigmas = 5;
217 kdeExpLimit = 1e-16;
218 kdeSampleFraction = 1;
219 kdeNumberToUse = -1;
220 kdeFlags = 0;
221
222 for (iArg = 1; iArg < argc; iArg++) {
223 if (scanned[iArg].arg_type == OPTION) {
224 /* process options here */
225 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
226 case CLO_COLUMNS:
227 if (scanned[iArg].n_items < 3)
228 SDDS_Bomb("invalid -columns syntax");
229 columnList = SDDS_Realloc(columnList, sizeof(*columnList) * (nColumnLists + 1));
230 if ((columnList[nColumnLists].normalizationMode = match_string(scanned[iArg].list[1], normalizationOption,
231 NORM_OPTIONS, 0)) == -1)
232 SDDS_Bomb("invalid normalization mode given");
233 columnList[nColumnLists].suppliedNames = scanned[iArg].n_items - 2;
234 columnList[nColumnLists].suppliedName = tmalloc(sizeof(*(columnList[nColumnLists].suppliedName)) * columnList[nColumnLists].suppliedNames);
235 for (i = 0; i < columnList[nColumnLists].suppliedNames; i++) {
236 columnList[nColumnLists].suppliedName[i] = scanned[iArg].list[i + 2];
237 scanned[iArg].list[i + 2] = NULL;
238 }
239 nColumnLists++;
240 break;
241 case CLO_OUTPUT:
242 if (scanned[iArg].n_items != 2)
243 SDDS_Bomb("invalid -outputColumn syntax: give a name");
244 outputColumn = scanned[iArg].list[1];
245 scanned[iArg].list[1] = NULL;
246 break;
247 case CLO_FRACTION:
248 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &fraction) != 1 || fraction <= 0)
249 SDDS_Bomb("invalid -fraction syntax: give a value greater than 0");
250 break;
251 case CLO_SPREAD:
252 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &spreadFraction) != 1 || spreadFraction <= 0)
253 SDDS_Bomb("invalid -spread syntax: give a value greater than 0");
254 break;
255 case CLO_THREADS:
256 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%d", &threads) != 1 || threads <= 0)
257 SDDS_Bomb("invalid -threads syntax: give a value greater than 0");
258 break;
259 case CLO_KDE:
260 if (scanned[iArg].n_items < 2)
261 SDDS_Bomb("invalid -kde syntax: give number of bins");
262 scanned[iArg].n_items -= 1;
263 kdeFlags = 0;
264 if (!scanItemList(&kdeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
265 "bins", SDDS_LONG, &bins, 1, KDE_BINS_SEEN,
266 "gridoutput", SDDS_STRING, &kdeGridOutput, 1, KDE_GRIDOUTPUT_SEEN,
267 "nsigmas", SDDS_LONG, &nKdeSigmas, 1, KDE_NSIGMAS_SEEN,
268 "explimit", SDDS_DOUBLE, &kdeExpLimit, 1, KDE_EXPLIMIT_SEEN,
269 "sample", SDDS_DOUBLE, &kdeSampleFraction, 1, KDE_SAMPLE_SEEN,
270 "use", SDDS_LONG, &kdeNumberToUse, 1, KDE_USE_SEEN,
271 "spanpages", -1, NULL, 0, KDE_SPAN_PAGES,
272 NULL))
273 SDDS_Bomb("invalid -kde syntax");
274 if (bins < 3)
275 SDDS_Bomb("Number of bins should be at least 3 for KDE");
276 if (nKdeSigmas < 1)
277 SDDS_Bomb("Number of sigmas should be at least 1 for KDE");
278 if (kdeExpLimit <= 0 || kdeExpLimit > 1)
279 SDDS_Bomb("Exponential limit for KDE must be (0, 1].");
280 if (kdeSampleFraction <= 0 || kdeSampleFraction > 1)
281 SDDS_Bomb("Sample fraction for KDE must be (0, 1].");
282 if (kdeFlags & KDE_USE_SEEN) {
283 if (kdeNumberToUse <= 1)
284 SDDS_Bomb("Number to use for KDE must be at least 1.");
285 if (kdeFlags & KDE_SAMPLE_SEEN)
286 SDDS_Bomb("Give sample fraction or number to use for KDE, not both.");
287 }
288 break;
289 case CLO_PIPE:
290 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
291 SDDS_Bomb("invalid -pipe syntax");
292 break;
293 case CLO_VERBOSE:
294 verbose = 1;
295 break;
296 case CLO_WEIGHT:
297 if (scanned[iArg].n_items != 2)
298 SDDS_Bomb("invalid -weight syntax: give the name of a column");
299 weightColumn = scanned[iArg].list[1];
300 break;
301 default:
302 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
303 exit(EXIT_FAILURE);
304 break;
305 }
306 } else {
307 if (!input)
308 input = scanned[iArg].list[0];
309 else if (!output)
310 output = scanned[iArg].list[0];
311 else
312 SDDS_Bomb("too many filenames seen");
313 }
314 }
315
316 startTime = delapsed_time();
317 if (((fraction > 0 ? 1 : 0) + (spreadFraction > 0 ? 1 : 0) + (bins > 0)) > 1)
318 SDDS_Bomb("give only one of -fraction, -spread, or -kde");
319
320 processFilenames("sddslocaldensity", &input, &output, pipeFlags, 0, NULL);
321
322 if (kdeFlags & KDE_SPAN_PAGES && pipeFlags)
323 SDDS_Bomb("-kde=spanPages is incompatible with -pipe option");
324 if (!kdeFlags && weightColumn)
325 SDDS_Bomb("-weight is only supported for -kde at present");
326
327 if (!nColumnLists)
328 SDDS_Bomb("supply the names of columns to include with the -columns option");
329
330 if (!SDDS_InitializeInput(&SDDSin, input))
331 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
332
333 if (!resolveColumnNames(&SDDSin, columnList, nColumnLists, &inputColumn, &normalizationMode, &inputColumns))
334 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
335 if (weightColumn && !(SDDS_CheckColumn(&SDDSin, weightColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK))
336 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
337
338 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
339 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
340
341 if (!outputColumn)
342 cp_str(&outputColumn, "LocalDensity");
343
344 if (!SDDS_DefineSimpleColumn(&SDDSout, outputColumn, NULL, SDDS_DOUBLE) ||
345 !SDDS_DefineSimpleParameter(&SDDSout, "sddslocaldensityElapsedTime", "s", SDDS_DOUBLE) ||
346 !SDDS_DefineSimpleParameter(&SDDSout, "sddslocaldensityThreads", NULL, SDDS_SHORT))
347 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
348
349 if (!SDDS_WriteLayout(&SDDSout))
350 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
351
352 if (kdeGridOutput) {
353 if (!SDDS_InitializeOutput(&SDDSkde, SDDS_BINARY, 1, NULL, NULL, kdeGridOutput))
354 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
355 for (i = 0; i < inputColumns; i++)
356 if (!SDDS_TransferColumnDefinition(&SDDSkde, &SDDSin, inputColumn[i], NULL))
357 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
358 if (!SDDS_TransferAllParameterDefinitions(&SDDSkde, &SDDSin, SDDS_TRANSFER_OVERWRITE))
359 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
360 if (!SDDS_DefineSimpleColumn(&SDDSkde, outputColumn, NULL, SDDS_DOUBLE))
361 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
362 if (!SDDS_WriteLayout(&SDDSkde))
363 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
364 }
365
366 data = tmalloc(sizeof(*data) * inputColumns);
367
368 if (bins) {
369 binTree.sum = NULL;
370 binTree.bins = 0;
371 binTree.nextLowerLayer = NULL;
372 createBinTree(bins, inputColumns, &binTree);
373 }
374
375 min = calloc(inputColumns, sizeof(*min));
376 max = calloc(inputColumns, sizeof(*max));
377 delta = malloc(sizeof(*delta) * inputColumns);
378#if !defined(NOTHREADS)
379 omp_set_num_threads(threads);
380#endif
381 bw = malloc(sizeof(*bw) * inputColumns);
382
383 pass = 1;
384 while ((kdeFlags & KDE_SPAN_PAGES && pass < 4) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
385 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
386 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
387 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
388 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
389 }
390 if ((rows = SDDS_CountRowsOfInterest(&SDDSin))) {
391 if (verbose)
392 fprintf(stderr, "Processing page %ld (pass %ld) with %" PRId64 " rows\n", readCode, pass, rows);
393 if (weightColumn &&
394 !(weightValue = SDDS_GetColumnInDoubles(&SDDSin, weightColumn)))
395 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
396 for (i = 0; i < inputColumns; i++) {
397 if (!(data[i] = SDDS_GetColumnInDoubles(&SDDSin, inputColumn[i])))
398 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
399 if (pass == 1) {
400 update_min_max(&min[i], &max[i], data[i], rows, kdeFlags & KDE_SPAN_PAGES ? readCode == 1 : 1);
401 if (bins)
402 delta[i] = (max[i] - min[i]) / (bins - 1);
403 else
404 delta[i] = 0;
405 if (verbose)
406 fprintf(stderr, "%s:[%le, %le] delta=%le\n", inputColumn[i], min[i], max[i], delta[i]);
407 }
408 if (pass != 1)
409 normalizeData(data[i], rows, normalizationMode[i], min[i], max[i], threads);
410 }
411 if (bins) {
412 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
413 (kdeFlags & KDE_SPAN_PAGES && pass == 2 && readCode == 1)) {
414 for (i = 0; i < inputColumns; i++) {
415 bw[i] = SilvermansBandwidth(data[i], inputColumns, rows);
416 if (verbose)
417 fprintf(stderr, "Bandwidth for %s is %le\n", inputColumn[i], bw[i]);
418 }
419 }
420
421 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
422 (kdeFlags & KDE_SPAN_PAGES && pass == 2)) {
423 if (verbose)
424 fprintf(stderr, "Summing density over grid.\n");
425 rowsSampled = 0;
426 if (kdeFlags & KDE_USE_SEEN) {
427 if ((kdeSampleFraction = (1.0 * kdeNumberToUse) / rows) > 1)
428 kdeSampleFraction = 1;
429 }
430 rowsSampledThread = calloc(threads, sizeof(*rowsSampledThread));
431 rowsSampled = 0;
432#pragma omp parallel
433 {
434#if defined(NOTHREADS)
435 addDensityToBinTree0(0);
436#else
437 addDensityToBinTree0(omp_get_thread_num());
438#endif
439 }
440 for (i = 0; i < threads; i++) {
441 rowsSampled += rowsSampledThread[i];
442 }
443 free(rowsSampledThread);
444
445 if (rowsSampled != rows) {
446 if (verbose)
447 fprintf(stderr, "%" PRId64 " of %" PRId64 " rows sampled\n", rowsSampled, rows);
448 rescaleDensity(bins, inputColumns, &binTree, (1.0 * rows / rowsSampled));
449 }
450 if (kdeGridOutput) {
451 if (verbose)
452 fprintf(stderr, "Dumping KDE grid\n");
453 dumpBinValues(&SDDSkde, &SDDSin, bins, inputColumn, inputColumns, outputColumn, &binTree, min, delta);
454 }
455 }
456
457 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
458
459 density = malloc(sizeof(*density) * rows);
460 if (verbose)
461 fprintf(stderr, "Interpolating density\n");
462#pragma omp parallel
463 {
464#if defined(NOTHREADS)
465 interpolateBinValue0(0);
466#else
467 interpolateBinValue0(omp_get_thread_num());
468#endif
469 }
470
471 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, density, rows, outputColumn))
472 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
473 free(density);
474 if (!(kdeFlags & KDE_SPAN_PAGES)) {
475 if (verbose)
476 fprintf(stderr, "Setting density map to zero\n");
477 zeroBinValues(bins, inputColumns, &binTree);
478 }
479 }
480 for (i = 0; i < inputColumns; i++)
481 free(data[i]);
482 } /* KDE mode */
483 else {
484 if (fraction > 0) {
485 double *count, *epsilon, distance;
486 count = tmalloc(rows * sizeof(*count));
487 epsilon = tmalloc(inputColumns * sizeof(*epsilon));
488 for (i = 0; i < inputColumns; i++)
489 epsilon[i] = fraction * (max[i] - min[i]);
490 for (iRow = 0; iRow < rows; iRow++) {
491 int64_t nInside = 0;
492 for (jRow = 0; jRow < rows; jRow++) {
493 short inside = 1;
494 if (iRow != jRow) {
495 for (i = 0; i < inputColumns; i++) {
496 distance = fabs(data[i][iRow] - data[i][jRow]);
497 if (distance > epsilon[i]) {
498 inside = 0;
499 break;
500 }
501 }
502 }
503 if (inside)
504 nInside++;
505 }
506 count[iRow] = nInside;
507 }
508 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, count, rows, outputColumn))
509 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
510 free(count);
511 free(epsilon);
512 } else if (spreadFraction > 0) {
513 double *count, *spread;
514 count = tmalloc(rows * sizeof(*count));
515 spread = tmalloc(sizeof(*spread) * inputColumns);
516 for (i = 0; i < inputColumns; i++) {
517 spread[i] = spreadFraction * (max[i] - min[i]);
518 }
519 for (iRow = 0; iRow < rows; iRow++) {
520 count[iRow] = 0;
521 for (jRow = 0; jRow < rows; jRow++) {
522 double term = 1;
523 for (i = 0; i < inputColumns; i++)
524 term *= exp(-sqr((data[i][iRow] - data[i][jRow]) / spread[i]) / 2);
525 count[iRow] += term;
526 }
527 }
528 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, count, rows, outputColumn))
529 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
530 free(count);
531 free(spread);
532 } else {
533 double *distance;
534 distance = tmalloc(rows * sizeof(*distance));
535 for (iRow = 0; iRow < rows; iRow++) {
536 double sum;
537 distance[iRow] = 0;
538 for (jRow = 0; jRow < rows; jRow++) {
539 sum = 0;
540 for (i = 0; i < inputColumns; i++)
541 sum += sqr(data[i][iRow] - data[i][jRow]);
542 distance[iRow] += sqrt(sum);
543 }
544 }
545 for (iRow = 0; iRow < rows; iRow++)
546 distance[iRow] = rows / distance[iRow];
547 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, distance, rows, outputColumn))
548 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
549 free(distance);
550 }
551 }
552 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
553 if (!SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
554 "sddslocaldensityElapsedTime", delapsed_time() - startTime,
555 "sddslocaldensityThreads", (short)threads,
556 NULL) ||
557 !SDDS_WritePage(&SDDSout))
558 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
559 }
560 }
561 }
562 if (kdeFlags & KDE_SPAN_PAGES && pass != 3) {
563 if (verbose)
564 fprintf(stderr, "Closing input file %s and reopening for second pass\n", input);
565 if (!SDDS_Terminate(&SDDSin) || !SDDS_InitializeInput(&SDDSin, input)) {
566 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
567 exit(EXIT_FAILURE);
568 }
569 }
570 pass++;
571 }
572 if (!SDDS_Terminate(&SDDSin) || !SDDS_Terminate(&SDDSout)) {
573 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
574 exit(EXIT_FAILURE);
575 }
576 if (kdeGridOutput && !SDDS_Terminate(&SDDSkde)) {
577 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
578 exit(EXIT_FAILURE);
579 }
580 if (verbose)
581 printf("Execution completed in %le s\n", delapsed_time() - startTime);
582 return EXIT_SUCCESS;
583}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
Definition cp_str.c:28
int update_min_max(double *min, double *max, double *list, int64_t n, int32_t reset)
Updates the minimum and maximum values based on a list of doubles.
Definition findMinMax.c:72
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ normalizeData()

void normalizeData ( double * data,
int64_t rows,
short mode,
double min,
double max,
int threads )

Definition at line 635 of file sddslocaldensity.c.

635 {
636 int64_t i;
637 double offset, rms, divisor;
638
639 switch (mode) {
640 case NORM_RANGE:
641 if (min_val == max_val)
642 SDDS_Bomb("attempt to normalize data with zero range");
643 divisor = max_val - min_val;
644 offset = max_val - min_val;
645 break;
646 case NORM_RMS:
647 computeMomentsThreaded(&offset, NULL, &rms, NULL, data, rows, threads);
648 if (rms == 0)
649 SDDS_Bomb("attempt to normalize data with zero rms");
650 divisor = rms;
651 break;
652 default:
653 divisor = 1;
654 offset = 0;
655 break;
656 }
657 for (i = 0; i < rows; i++)
658 data[i] = (data[i] - offset) / divisor;
659}
long computeMomentsThreaded(double *mean, double *rms, double *standDev, double *meanAbsoluteDev, double *x, long n, long numThreads)
Computes the mean, RMS, standard deviation, and mean absolute deviation of an array using multiple th...
Definition moments.c:127

◆ rescaleDensity()

void rescaleDensity ( long bins,
long inputColumns,
BIN_LAYER * layer,
double factor )

Definition at line 703 of file sddslocaldensity.c.

703 {
704 long i;
705 if (inputColumns == 1) {
706 for (i = 0; i < bins; i++)
707 layer->sum[i] *= factor;
708 } else {
709 for (i = 0; i < bins; i++)
710 rescaleDensity(bins, inputColumns - 1, layer->nextLowerLayer + i, factor);
711 }
712}

◆ resolveColumnNames()

long resolveColumnNames ( SDDS_DATASET * SDDSin,
COLUMN_LIST * columnList,
int32_t nColumnLists,
char *** columnName,
short ** normalizationMode,
int32_t * nColumns )

Definition at line 585 of file sddslocaldensity.c.

586 {
587 int32_t i, j, k;
588 char **newColumnName;
589 int32_t newColumns;
590 short duplicate;
591
592 *columnName = NULL;
593 *normalizationMode = NULL;
594 *nColumns = 0;
595
596 for (i = 0; i < nColumnLists; i++) {
597 SDDS_SetColumnFlags(SDDSin, 0);
598 for (j = 0; j < columnList[i].suppliedNames; j++) {
599 if (!SDDS_SetColumnsOfInterest(SDDSin, SDDS_MATCH_STRING, columnList[i].suppliedName[j], SDDS_OR))
600 return 0;
601 }
602 if (!(newColumnName = SDDS_GetColumnNames(SDDSin, &newColumns)) || newColumns == 0) {
603 SDDS_SetError("no columns found for one or more -column options");
604 return 0;
605 }
606 for (j = 0; j < newColumns; j++) {
607 duplicate = 0;
608 for (k = 0; k < *nColumns; k++) {
609 if (strcmp(newColumnName[j], (*columnName)[k]) == 0) {
610 /* overwrite the previous normalization mode */
611 (*normalizationMode)[k] = columnList[i].normalizationMode;
612 duplicate = 1;
613 }
614 }
615 if (!duplicate) {
616 *columnName = SDDS_Realloc(*columnName, sizeof(**columnName) * (*nColumns + 1));
617 (*columnName)[*nColumns] = newColumnName[j];
618 *normalizationMode = SDDS_Realloc(*normalizationMode, sizeof(**normalizationMode) * (*nColumns + 1));
619 (*normalizationMode)[*nColumns] = columnList[i].normalizationMode;
620 *nColumns += 1;
621 }
622 }
623 }
624 /*
625 printf("Computing density using %d columns\n", *nColumns);
626 for (i=0; i<*nColumns; i++) {
627 printf("%s --- %s normalization\n",
628 (*columnName)[i], normalizationOption[(*normalizationMode)[i]]);
629 fflush(stdout);
630 }
631 */
632 return 1;
633}
int32_t SDDS_SetColumnsOfInterest(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
Sets the acceptance flags for columns based on specified naming criteria.
int32_t SDDS_SetColumnFlags(SDDS_DATASET *SDDS_dataset, int32_t column_flag_value)
Sets the acceptance flags for all columns in the current data table of a data set.
void SDDS_SetError(char *error_text)
Records an error message in the SDDS error stack.
Definition SDDS_utils.c:379
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.

◆ retrieveBinValue()

double retrieveBinValue ( long bins,
long inputColumns,
BIN_LAYER * layer,
long * indexList )

Definition at line 768 of file sddslocaldensity.c.

768 {
769 while (--inputColumns) {
770 layer = layer->nextLowerLayer + indexList[0];
771 indexList++;
772 }
773 return layer->sum[indexList[0]];
774}

◆ SilvermansBandwidth()

double SilvermansBandwidth ( double data[],
long dimensions,
int64_t M )

Definition at line 661 of file sddslocaldensity.c.

661 {
662 /* See https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation#Optimal_bandwidth_matrix_selection */
663 return pow(4.0 / (dimensions + 2.0) / M, 2.0 / (dimensions + 4.0)) * ipow2(gsl_stats_sd(data, 1, M));
664}

◆ zeroBinValues()

void zeroBinValues ( long bins,
long inputColumns,
BIN_LAYER * layer )

Definition at line 714 of file sddslocaldensity.c.

714 {
715 long i;
716 if (inputColumns == 1) {
717 for (i = 0; i < bins; i++)
718 layer->sum[i] = 0;
719 } else {
720 for (i = 0; i < bins; i++)
721 zeroBinValues(bins, inputColumns - 1, layer->nextLowerLayer + i);
722 }
723}

Variable Documentation

◆ bins

long bins

Definition at line 170 of file sddslocaldensity.c.

◆ binTree

BIN_LAYER binTree

Definition at line 169 of file sddslocaldensity.c.

◆ bw

double* bw = NULL

Definition at line 179 of file sddslocaldensity.c.

◆ data

double** data

Definition at line 174 of file sddslocaldensity.c.

◆ delta

double * delta

Definition at line 171 of file sddslocaldensity.c.

◆ density

double* density = NULL

Definition at line 180 of file sddslocaldensity.c.

◆ inputColumns

int32_t inputColumns

Definition at line 178 of file sddslocaldensity.c.

◆ iRow

int64_t iRow

Definition at line 172 of file sddslocaldensity.c.

◆ jRow

int64_t jRow

Definition at line 172 of file sddslocaldensity.c.

◆ kdeExpLimit

double kdeExpLimit

Definition at line 175 of file sddslocaldensity.c.

◆ kdeSampleFraction

double kdeSampleFraction

Definition at line 177 of file sddslocaldensity.c.

◆ max

double * max

Definition at line 171 of file sddslocaldensity.c.

◆ min

double* min

Definition at line 171 of file sddslocaldensity.c.

◆ nKdeSigmas

long nKdeSigmas

Definition at line 170 of file sddslocaldensity.c.

◆ normalizationOption

char* normalizationOption[NORM_OPTIONS]
Initial value:
= {
"none", "range", "rms"}

Definition at line 89 of file sddslocaldensity.c.

89 {
90 "none", "range", "rms"};

◆ option

char* option[N_OPTIONS]
Initial value:
= {
"columns",
"pipe",
"output",
"fraction",
"spread",
"kde",
"verbose",
"threads",
"weight",
}

Definition at line 73 of file sddslocaldensity.c.

73 {
74 "columns",
75 "pipe",
76 "output",
77 "fraction",
78 "spread",
79 "kde",
80 "verbose",
81 "threads",
82 "weight",
83};

◆ rows

int64_t rows

Definition at line 172 of file sddslocaldensity.c.

◆ rowsSampled

int64_t rowsSampled

Definition at line 172 of file sddslocaldensity.c.

◆ rowsSampledThread

int64_t* rowsSampledThread = NULL

Definition at line 173 of file sddslocaldensity.c.

◆ threads

int threads = 1

Definition at line 168 of file sddslocaldensity.c.

◆ USAGE

char* USAGE
static
Initial value:
=
"Usage: sddslocaldensity [<inputfile>] [<outputfile>] [-pipe=[input][,output]] [-threads=<number>]\n"
" {-fraction=<value> | -spread=<value> | \n"
" -kde=bins=<number>[,gridoutput=<filename>][,nsigma=<value>][,explimit=<value>]"
"[,sample=<fraction>|use=<number>][,spanPages]}\n"
" -columns=<normalizationMode>,<name>[,...] [-output=<columnName>] "
"[-weight=<columnName>] [-verbose]\n\n"
"Options:\n"
" -pipe The standard SDDS Toolkit pipe option.\n"
" -threads The number of threads to use.\n"
" -columns Specifies the names of the columns to include. The names may include wildcards.\n"
" The normalization mode is one of \"none\", \"range\", or \"rms\".\n"
" Note that the normalization mode is irrelevant when fraction or spread options are used.\n"
" -weight Name of the column with which to weight the contributions of each point.\n"
" -fraction Fraction of the range to use to identify \"nearby\" points.\n"
" -spread Standard deviation of the weighting function as a fraction of the range.\n"
" -kde If specified, use n-dimensional Kernel Density Estimation instead of a point-based algorithm.\n"
" Highly recommended when the number of data points is large to avoid N² growth in runtime.\n"
" nsigma gives the number of standard deviations of the bandwidth over which to sum the\n"
" Gaussian factor; smaller numbers can considerably improve performance at the expense of\n"
" lower accuracy in the tails. Using the sample qualifier allows using a randomly-sampled\n"
" fraction of the data to create the density map. The use qualifier allows computing the\n"
" sample fraction so that approximately the indicated number of samples is used.\n"
" If spanPages is given, the KDE density map is created using data from all pages of the file;\n"
" however, the output retains the original page breakdown; useful when processing very large\n"
" quantities of data.\n"
" -output Name of the output column. Defaults to \"LocalDensity\".\n"
" -verbose Print progress information while running.\n\n"
"Program by Michael Borland. (" __DATE__ " " __TIME__ "\", SVN revision: " SVN_VERSION ")\n"

Definition at line 93 of file sddslocaldensity.c.

◆ verbose

int verbose

Definition at line 176 of file sddslocaldensity.c.

◆ weightValue

double* weightValue = NULL

Definition at line 181 of file sddslocaldensity.c.