58 int32_t maximumBins, binFactor;
59 double deltaGuess, adjustFactor;
62char *option[N_OPTIONS] = {
69#define COLUMN_MAXIMUM_BINS 0x01UL
70#define COLUMN_BIN_FACTOR 0x02UL
71#define COLUMN_DELTA_GUESS 0x04UL
72#define COLUMN_ADJUST_FACTOR 0x08UL
75 "sddssnap2grid [<inputfile>] [<outputfile>] [-pipe=[input][,output]]\n"
76 " -column=<name>,[{maximumBins=<value> | binFactor=<value>} | deltaGuess=<value>][,adjustFactor=<value>]\n"
78 " [-makeParameters] [-verbose]\n"
81 " -pipe Standard SDDS Toolkit pipe option.\n"
82 " -column Specify the name of a column to modify for equispaced values.\n"
83 " The default mode uses binFactor = 10, meaning the maximum number\n"
84 " of bins is 10 times the number of data points. The algorithm works as follows:\n"
85 " 1. Bin the data with the maximum number of bins.\n"
86 " 2. If no two adjacent bins are populated, use this grouping to compute\n"
87 " centroids for each subset, providing delta values.\n"
88 " 3. If two adjacent bins are populated, multiply the number of bins by\n"
89 " adjustFactor (default: 0.9) and repeat the process.\n"
90 " Alternatively, you can provide a guess for the grid spacing;\n"
91 " the algorithm will use 1/10 of this as the initial bin size.\n"
93 " Store grid parameters in the output file as parameters named\n"
94 " <name>Minimum, <name>Maximum, <name>Interval, and <name>Dimension.\n"
95 " -verbose Report the computed deltas and the number of grid points.\n"
97 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
99void SnapDataToGrid(
double *data, int64_t rows,
COLUMN_TO_SNAP *column,
int verbose,
double *min,
double *max, int64_t *points);
101void StoreGridParameters(
SDDS_DATASET *SDDSout,
char *column,
double min,
double max, int64_t points);
103int main(
int argc,
char **argv) {
104 int iArg, iColumn, verbose;
105 char *input, *output;
106 unsigned long pipeFlags;
107 SCANNED_ARG *scanned;
113 short makeParameters;
118 argc =
scanargs(&scanned, argc, argv);
124 input = output = NULL;
129 for (iArg = 1; iArg < argc; iArg++) {
130 if (scanned[iArg].arg_type == OPTION) {
132 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
134 column =
SDDS_Realloc(column,
sizeof(*column) * (nColumns + 1));
135 column[nColumns].name = scanned[iArg].list[1];
136 column[nColumns].maximumBins = -1;
137 column[nColumns].binFactor = 10;
138 column[nColumns].deltaGuess = -1;
139 column[nColumns].flags = COLUMN_BIN_FACTOR;
140 column[nColumns].adjustFactor = 0.9;
141 scanned[iArg].n_items -= 2;
142 if (scanned[iArg].n_items) {
143 if (!
scanItemList(&(column[nColumns].flags), scanned[iArg].list + 2, &scanned[iArg].n_items, 0,
144 "maximumbins",
SDDS_LONG, &(column[nColumns].maximumBins), 1, COLUMN_MAXIMUM_BINS,
145 "binfactor",
SDDS_LONG, &(column[nColumns].binFactor), 1, COLUMN_BIN_FACTOR,
146 "deltaguess",
SDDS_DOUBLE, &(column[nColumns].deltaGuess), 1, COLUMN_DELTA_GUESS,
147 "adjustfactor",
SDDS_DOUBLE, &(column[nColumns].adjustFactor), 1, COLUMN_ADJUST_FACTOR,
151 if (column[nColumns].flags & COLUMN_ADJUST_FACTOR) {
152 if (column[nColumns].adjustFactor <= 0 || column[nColumns].adjustFactor >= 1)
153 SDDS_Bomb(
"invalid -column syntax. adjustFactor must be (0,1)");
155 if (column[nColumns].flags & COLUMN_DELTA_GUESS) {
156 if (column[nColumns].flags & ~(COLUMN_DELTA_GUESS | COLUMN_ADJUST_FACTOR))
157 SDDS_Bomb(
"invalid -column syntax. Can't combine deltaGuess with other options.");
158 if (column[nColumns].deltaGuess <= 0)
159 SDDS_Bomb(
"invalid -column syntax. deltaGuess<=0.");
161 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].flags & COLUMN_MAXIMUM_BINS)
162 SDDS_Bomb(
"invalid -column syntax. Can't give minimumBins and maximumBins with binFactor");
163 if (!(column[nColumns].flags & COLUMN_BIN_FACTOR) && !(column[nColumns].flags & COLUMN_MAXIMUM_BINS))
164 SDDS_Bomb(
"invalid -column syntax. Give maximumBins or binFactor");
165 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].binFactor < 1)
166 SDDS_Bomb(
"invalid -column syntax. binFactor<1");
172 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
178 case CLO_MAKE_PARAMETERS:
182 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
188 input = scanned[iArg].list[0];
190 output = scanned[iArg].list[0];
204 AddParameterDefinitions(&SDDSout, &SDDSin, column, nColumns);
212 for (iColumn = 0; iColumn < nColumns; iColumn++) {
215 SnapDataToGrid(data, rows, column + iColumn, verbose, &min, &max, &points);
219 StoreGridParameters(&SDDSout, column[iColumn].name, min, max, points);
238void SnapDataToGrid(
double *data, int64_t rows,
COLUMN_TO_SNAP *column,
int verbose,
239 double *minReturn,
double *maxReturn, int64_t *pointsReturn) {
240 int64_t i, bins, centroids;
241 double min, max, hmin, hmax, span, middle;
243 double *histogram, *whistogram, *centroid, delta;
246 if ((span = max - min) <= 0)
250 span *= 1 + 2.0 / rows;
251 middle = (max + min) / 2;
252 hmin = middle - span / 2;
253 hmax = middle + span / 2;
256 if (column->flags & COLUMN_DELTA_GUESS)
257 bins = (hmax - hmin) / (column->deltaGuess / 10);
258 else if (column->flags & COLUMN_MAXIMUM_BINS)
259 bins = column->maximumBins;
260 else if (column->flags & COLUMN_BIN_FACTOR)
261 bins = rows * column->binFactor;
263 SDDS_Bomb(
"logic error. Missing flags for determination of maximum number of bins.");
266 printf(
"Working on %s with %" PRId64
" bins, span=%le, hmin=%le, hmax=%le\n",
267 column->name, bins, span, hmin, hmax);
272 histogram = calloc(bins,
sizeof(*histogram));
273 whistogram = calloc(bins,
sizeof(*whistogram));
275 printf(
"Histogramming %s with %" PRId64
" bins\n", column->name, bins);
279 for (i = 1; i < bins; i++) {
280 if (histogram[i] && histogram[i - 1])
286 bins *= column->adjustFactor;
290 snprintf(s, 16384,
"Unable to snap data for %s to grid\n", column->name);
296 centroid =
tmalloc(
sizeof(*centroid) * bins);
298 for (i = 0; i < bins; i++) {
304 centroid[centroids++] = whistogram[i] / histogram[i];
309 delta = (centroid[centroids - 1] - centroid[0]) / (centroids - 1);
311 for (i = 0; i < rows; i++)
312 data[i] = ((
long)((data[i] - centroid[0]) / delta + 0.5)) * delta + centroid[0];
314 printf(
"Completed work for %s: delta = %le, start = %le, locations = %" PRId64
"\n",
315 column->name, delta, centroid[0], centroids);
318 *minReturn = centroid[0];
319 *pointsReturn = centroids;
320 *maxReturn = centroid[0] + (centroids - 1) * delta;
329 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
331 for (icol = 0; icol < nColumns; icol++) {
332 snprintf(name1, BUFLEN,
"%sMinimum", column[icol].name);
333 snprintf(name2, BUFLEN,
"%sMaximum", column[icol].name);
334 snprintf(name3, BUFLEN,
"%sInterval", column[icol].name);
335 snprintf(name4, BUFLEN,
"%sDimension", column[icol].name);
344void StoreGridParameters(
SDDS_DATASET *SDDSout,
char *column,
double min,
double max, int64_t points) {
345 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
347 snprintf(name1, BUFLEN,
"%sMinimum", column);
348 snprintf(name2, BUFLEN,
"%sMaximum", column);
349 snprintf(name3, BUFLEN,
"%sInterval", column);
350 snprintf(name4, BUFLEN,
"%sDimension", column);
354 name3, points > 1 ? (max - min) / (points - 1) : -1,
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.