215 {
216 int iArg;
218 int32_t nColumnLists;
219 char **inputColumn, *weightColumn;
220 short *normalizationMode;
221 double fraction, spreadFraction;
222 char *input, *output;
223 char *outputColumn;
224 long i, readCode;
225 unsigned long pipeFlags;
226 SCANNED_ARG *scanned;
228 char *kdeGridOutput;
229 int32_t kdeNumberToUse;
230 long pass;
231 unsigned long kdeFlags;
232 double startTime;
233
235 argc =
scanargs(&scanned, argc, argv);
236 if (argc < 3 || argc > (3 + N_OPTIONS))
238
239 output = input = NULL;
240 outputColumn = weightColumn = NULL;
241 columnList = NULL;
242 nColumnLists = 0;
243 pipeFlags = 0;
244 fraction = spreadFraction = 0;
245 bins = 0;
246 kdeGridOutput = NULL;
247 verbose = 0;
248 nKdeSigmas = 5;
249 kdeExpLimit = 1e-16;
250 kdeSampleFraction = 1;
251 kdeNumberToUse = -1;
252 kdeFlags = 0;
253
254 for (iArg = 1; iArg < argc; iArg++) {
255 if (scanned[iArg].arg_type == OPTION) {
256
257 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
258 case CLO_COLUMNS:
259 if (scanned[iArg].n_items < 3)
261 columnList =
SDDS_Realloc(columnList,
sizeof(*columnList) * (nColumnLists + 1));
262 if ((columnList[nColumnLists].normalizationMode =
match_string(scanned[iArg].list[1], normalizationOption,
263 NORM_OPTIONS, 0)) == -1)
264 SDDS_Bomb(
"invalid normalization mode given");
265 columnList[nColumnLists].suppliedNames = scanned[iArg].n_items - 2;
266 columnList[nColumnLists].suppliedName =
tmalloc(
sizeof(*(columnList[nColumnLists].suppliedName)) * columnList[nColumnLists].suppliedNames);
267 for (i = 0; i < columnList[nColumnLists].suppliedNames; i++) {
268 columnList[nColumnLists].suppliedName[i] = scanned[iArg].list[i + 2];
269 scanned[iArg].list[i + 2] = NULL;
270 }
271 nColumnLists++;
272 break;
273 case CLO_OUTPUT:
274 if (scanned[iArg].n_items != 2)
275 SDDS_Bomb(
"invalid -outputColumn syntax: give a name");
276 outputColumn = scanned[iArg].list[1];
277 scanned[iArg].list[1] = NULL;
278 break;
279 case CLO_FRACTION:
280 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &fraction) != 1 || fraction <= 0)
281 SDDS_Bomb(
"invalid -fraction syntax: give a value greater than 0");
282 break;
283 case CLO_SPREAD:
284 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%lf", &spreadFraction) != 1 || spreadFraction <= 0)
285 SDDS_Bomb(
"invalid -spread syntax: give a value greater than 0");
286 break;
287 case CLO_THREADS:
288 if (scanned[iArg].n_items != 2 || sscanf(scanned[iArg].list[1], "%d", &threads) != 1 || threads <= 0)
289 SDDS_Bomb(
"invalid -threads syntax: give a value greater than 0");
290 break;
291 case CLO_KDE:
292 if (scanned[iArg].n_items < 2)
293 SDDS_Bomb(
"invalid -kde syntax: give number of bins");
294 scanned[iArg].n_items -= 1;
295 kdeFlags = 0;
296 if (!
scanItemList(&kdeFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
297 "bins",
SDDS_LONG, &bins, 1, KDE_BINS_SEEN,
298 "gridoutput",
SDDS_STRING, &kdeGridOutput, 1, KDE_GRIDOUTPUT_SEEN,
299 "nsigmas",
SDDS_LONG, &nKdeSigmas, 1, KDE_NSIGMAS_SEEN,
300 "explimit",
SDDS_DOUBLE, &kdeExpLimit, 1, KDE_EXPLIMIT_SEEN,
301 "sample",
SDDS_DOUBLE, &kdeSampleFraction, 1, KDE_SAMPLE_SEEN,
302 "use",
SDDS_LONG, &kdeNumberToUse, 1, KDE_USE_SEEN,
303 "spanpages", -1, NULL, 0, KDE_SPAN_PAGES,
304 NULL))
306 if (bins < 3)
307 SDDS_Bomb(
"Number of bins should be at least 3 for KDE");
308 if (nKdeSigmas < 1)
309 SDDS_Bomb(
"Number of sigmas should be at least 1 for KDE");
310 if (kdeExpLimit <= 0 || kdeExpLimit > 1)
311 SDDS_Bomb(
"Exponential limit for KDE must be (0, 1].");
312 if (kdeSampleFraction <= 0 || kdeSampleFraction > 1)
313 SDDS_Bomb(
"Sample fraction for KDE must be (0, 1].");
314 if (kdeFlags & KDE_USE_SEEN) {
315 if (kdeNumberToUse <= 1)
316 SDDS_Bomb(
"Number to use for KDE must be at least 1.");
317 if (kdeFlags & KDE_SAMPLE_SEEN)
318 SDDS_Bomb(
"Give sample fraction or number to use for KDE, not both.");
319 }
320 break;
321 case CLO_PIPE:
322 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
324 break;
325 case CLO_VERBOSE:
326 verbose = 1;
327 break;
328 case CLO_WEIGHT:
329 if (scanned[iArg].n_items != 2)
330 SDDS_Bomb(
"invalid -weight syntax: give the name of a column");
331 weightColumn = scanned[iArg].list[1];
332 break;
333 default:
334 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
335 exit(EXIT_FAILURE);
336 break;
337 }
338 } else {
339 if (!input)
340 input = scanned[iArg].list[0];
341 else if (!output)
342 output = scanned[iArg].list[0];
343 else
345 }
346 }
347
349 if (((fraction > 0 ? 1 : 0) + (spreadFraction > 0 ? 1 : 0) + (bins > 0)) > 1)
350 SDDS_Bomb(
"give only one of -fraction, -spread, or -kde");
351
353
354 if (kdeFlags & KDE_SPAN_PAGES && pipeFlags)
355 SDDS_Bomb(
"-kde=spanPages is incompatible with -pipe option");
356 if (!kdeFlags && weightColumn)
357 SDDS_Bomb(
"-weight is only supported for -kde at present");
358
359 if (!nColumnLists)
360 SDDS_Bomb(
"supply the names of columns to include with the -columns option");
361
364
365 if (!resolveColumnNames(&SDDSin, columnList, nColumnLists, &inputColumn, &normalizationMode, &inputColumns))
369
372
373 if (!outputColumn)
374 cp_str(&outputColumn,
"LocalDensity");
375
380
383
384 if (kdeGridOutput) {
387 for (i = 0; i < inputColumns; i++)
396 }
397
398 data =
tmalloc(
sizeof(*data) * inputColumns);
399
400 if (bins) {
401 binTree.sum = NULL;
402 binTree.bins = 0;
403 binTree.nextLowerLayer = NULL;
404 createBinTree(bins, inputColumns, &binTree);
405 }
406
407 min = calloc(inputColumns, sizeof(*min));
408 max = calloc(inputColumns, sizeof(*max));
409 delta = malloc(sizeof(*delta) * inputColumns);
410#if !defined(NOTHREADS)
411 omp_set_num_threads(threads);
412#endif
413 bw = malloc(sizeof(*bw) * inputColumns);
414
415 pass = 1;
416 while ((kdeFlags & KDE_SPAN_PAGES && pass < 4) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
418 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
421 }
423 if (verbose)
424 fprintf(stderr, "Processing page %ld (pass %ld) with %" PRId64 " rows\n", readCode, pass, rows);
425 if (weightColumn &&
428 for (i = 0; i < inputColumns; i++) {
431 if (pass == 1) {
432 update_min_max(&min[i], &max[i], data[i], rows, kdeFlags & KDE_SPAN_PAGES ? readCode == 1 : 1);
433 if (bins)
434 delta[i] = (max[i] - min[i]) / (bins - 1);
435 else
436 delta[i] = 0;
437 if (verbose)
438 fprintf(stderr, "%s:[%le, %le] delta=%le\n", inputColumn[i], min[i], max[i], delta[i]);
439 }
440 if (pass != 1)
441 normalizeData(data[i], rows, normalizationMode[i], min[i], max[i], threads);
442 }
443 if (bins) {
444 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
445 (kdeFlags & KDE_SPAN_PAGES && pass == 2 && readCode == 1)) {
446 for (i = 0; i < inputColumns; i++) {
447 bw[i] = SilvermansBandwidth(data[i], inputColumns, rows);
448 if (verbose)
449 fprintf(stderr, "Bandwidth for %s is %le\n", inputColumn[i], bw[i]);
450 }
451 }
452
453 if ((!(kdeFlags & KDE_SPAN_PAGES) && pass == 1) ||
454 (kdeFlags & KDE_SPAN_PAGES && pass == 2)) {
455 if (verbose)
456 fprintf(stderr, "Summing density over grid.\n");
457 rowsSampled = 0;
458 if (kdeFlags & KDE_USE_SEEN) {
459 if ((kdeSampleFraction = (1.0 * kdeNumberToUse) / rows) > 1)
460 kdeSampleFraction = 1;
461 }
462 rowsSampledThread = calloc(threads, sizeof(*rowsSampledThread));
463 rowsSampled = 0;
464#pragma omp parallel
465 {
466#if defined(NOTHREADS)
467 addDensityToBinTree0(0);
468#else
469 addDensityToBinTree0(omp_get_thread_num());
470#endif
471 }
472 for (i = 0; i < threads; i++) {
473 rowsSampled += rowsSampledThread[i];
474 }
475 free(rowsSampledThread);
476
477 if (rowsSampled != rows) {
478 if (verbose)
479 fprintf(stderr, "%" PRId64 " of %" PRId64 " rows sampled\n", rowsSampled, rows);
480 rescaleDensity(bins, inputColumns, &binTree, (1.0 * rows / rowsSampled));
481 }
482 if (kdeGridOutput) {
483 if (verbose)
484 fprintf(stderr, "Dumping KDE grid\n");
485 dumpBinValues(&SDDSkde, &SDDSin, bins, inputColumn, inputColumns, outputColumn, &binTree, min, delta);
486 }
487 }
488
489 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
490
491 density = malloc(sizeof(*density) * rows);
492 if (verbose)
493 fprintf(stderr, "Interpolating density\n");
494#pragma omp parallel
495 {
496#if defined(NOTHREADS)
497 interpolateBinValue0(0);
498#else
499 interpolateBinValue0(omp_get_thread_num());
500#endif
501 }
502
505 free(density);
506 if (!(kdeFlags & KDE_SPAN_PAGES)) {
507 if (verbose)
508 fprintf(stderr, "Setting density map to zero\n");
509 zeroBinValues(bins, inputColumns, &binTree);
510 }
511 }
512 for (i = 0; i < inputColumns; i++)
513 free(data[i]);
514 }
515 else {
516 if (fraction > 0) {
517 double *count, *epsilon, distance;
518 count =
tmalloc(rows *
sizeof(*count));
519 epsilon =
tmalloc(inputColumns *
sizeof(*epsilon));
520 for (i = 0; i < inputColumns; i++)
521 epsilon[i] = fraction * (max[i] - min[i]);
522 for (iRow = 0; iRow < rows; iRow++) {
523 int64_t nInside = 0;
524 for (jRow = 0; jRow < rows; jRow++) {
525 short inside = 1;
526 if (iRow != jRow) {
527 for (i = 0; i < inputColumns; i++) {
528 distance = fabs(data[i][iRow] - data[i][jRow]);
529 if (distance > epsilon[i]) {
530 inside = 0;
531 break;
532 }
533 }
534 }
535 if (inside)
536 nInside++;
537 }
538 count[iRow] = nInside;
539 }
542 free(count);
543 free(epsilon);
544 } else if (spreadFraction > 0) {
545 double *count, *spread;
546 count =
tmalloc(rows *
sizeof(*count));
547 spread =
tmalloc(
sizeof(*spread) * inputColumns);
548 for (i = 0; i < inputColumns; i++) {
549 spread[i] = spreadFraction * (max[i] - min[i]);
550 }
551 for (iRow = 0; iRow < rows; iRow++) {
552 count[iRow] = 0;
553 for (jRow = 0; jRow < rows; jRow++) {
554 double term = 1;
555 for (i = 0; i < inputColumns; i++)
556 term *= exp(-sqr((data[i][iRow] - data[i][jRow]) / spread[i]) / 2);
557 count[iRow] += term;
558 }
559 }
562 free(count);
563 free(spread);
564 } else {
565 double *distance;
566 distance =
tmalloc(rows *
sizeof(*distance));
567 for (iRow = 0; iRow < rows; iRow++) {
568 double sum;
569 distance[iRow] = 0;
570 for (jRow = 0; jRow < rows; jRow++) {
571 sum = 0;
572 for (i = 0; i < inputColumns; i++)
573 sum += sqr(data[i][iRow] - data[i][jRow]);
574 distance[iRow] += sqrt(sum);
575 }
576 }
577 for (iRow = 0; iRow < rows; iRow++)
578 distance[iRow] = rows / distance[iRow];
581 free(distance);
582 }
583 }
584 if ((kdeFlags & KDE_SPAN_PAGES && pass == 3) || (!(kdeFlags & KDE_SPAN_PAGES) && pass == 1)) {
587 "sddslocaldensityThreads", (short)threads,
588 NULL) ||
591 }
592 }
593 }
594 if (kdeFlags & KDE_SPAN_PAGES && pass != 3) {
595 if (verbose)
596 fprintf(stderr, "Closing input file %s and reopening for second pass\n", input);
599 exit(EXIT_FAILURE);
600 }
601 }
602 pass++;
603 }
606 exit(EXIT_FAILURE);
607 }
610 exit(EXIT_FAILURE);
611 }
612 if (verbose)
613 printf(
"Execution completed in %le s\n",
delapsed_time() - startTime);
614 return EXIT_SUCCESS;
615}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_SHORT
Identifier for the signed short integer data type.
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
char * cp_str(char **s, char *t)
Copies a string, allocating memory for storage.
int update_min_max(double *min, double *max, double *list, int64_t n, int32_t reset)
Updates the minimum and maximum values based on a list of doubles.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.