72 int32_t maximumBins, binFactor;
73 double deltaGuess, adjustFactor;
76char *option[N_OPTIONS] = {
83#define COLUMN_MAXIMUM_BINS 0x01UL
84#define COLUMN_BIN_FACTOR 0x02UL
85#define COLUMN_DELTA_GUESS 0x04UL
86#define COLUMN_ADJUST_FACTOR 0x08UL
89 "sddssnap2grid [<inputfile>] [<outputfile>] [-pipe=[input][,output]]\n"
90 " -column=<name>,[{maximumBins=<value>|binFactor=<value>|deltaGuess=<value>}][,adjustFactor=<value>]\n"
92 " [-makeParameters] [-verbose]\n"
95 " -pipe Standard SDDS Toolkit pipe option.\n"
96 " -column Specify the name of a column to modify for equispaced values.\n"
97 " The default mode uses binFactor = 10, meaning the maximum number\n"
98 " of bins is 10 times the number of data points. The algorithm works as follows:\n"
99 " 1. Bin the data with the maximum number of bins.\n"
100 " 2. If no two adjacent bins are populated, use this grouping to compute\n"
101 " centroids for each subset, providing delta values.\n"
102 " 3. If two adjacent bins are populated, multiply the number of bins by\n"
103 " adjustFactor (default: 0.9) and repeat the process.\n"
104 " Alternatively, you can provide a guess for the grid spacing;\n"
105 " the algorithm will use 1/10 of this as the initial bin size.\n"
107 " Store grid parameters in the output file as parameters named\n"
108 " <name>Minimum, <name>Maximum, <name>Interval, and <name>Dimension.\n"
109 " -verbose Report the computed deltas and the number of grid points.\n"
111 "Program by Michael Borland. (" __DATE__
" " __TIME__
", SVN revision: " SVN_VERSION
")\n";
113void SnapDataToGrid(
double *data, int64_t rows,
COLUMN_TO_SNAP *column,
int verbose,
double *min,
double *max, int64_t *points);
115void StoreGridParameters(
SDDS_DATASET *SDDSout,
char *column,
double min,
double max, int64_t points);
117int main(
int argc,
char **argv) {
118 int iArg, iColumn, verbose;
119 char *input, *output;
120 unsigned long pipeFlags;
121 SCANNED_ARG *scanned;
127 short makeParameters;
132 argc =
scanargs(&scanned, argc, argv);
138 input = output = NULL;
143 for (iArg = 1; iArg < argc; iArg++) {
144 if (scanned[iArg].arg_type == OPTION) {
146 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
148 column =
SDDS_Realloc(column,
sizeof(*column) * (nColumns + 1));
149 column[nColumns].name = scanned[iArg].list[1];
150 column[nColumns].maximumBins = -1;
151 column[nColumns].binFactor = 10;
152 column[nColumns].deltaGuess = -1;
153 column[nColumns].flags = COLUMN_BIN_FACTOR;
154 column[nColumns].adjustFactor = 0.9;
155 scanned[iArg].n_items -= 2;
156 if (scanned[iArg].n_items) {
157 if (!
scanItemList(&(column[nColumns].flags), scanned[iArg].list + 2, &scanned[iArg].n_items, 0,
158 "maximumbins",
SDDS_LONG, &(column[nColumns].maximumBins), 1, COLUMN_MAXIMUM_BINS,
159 "binfactor",
SDDS_LONG, &(column[nColumns].binFactor), 1, COLUMN_BIN_FACTOR,
160 "deltaguess",
SDDS_DOUBLE, &(column[nColumns].deltaGuess), 1, COLUMN_DELTA_GUESS,
161 "adjustfactor",
SDDS_DOUBLE, &(column[nColumns].adjustFactor), 1, COLUMN_ADJUST_FACTOR,
165 if (column[nColumns].flags & COLUMN_ADJUST_FACTOR) {
166 if (column[nColumns].adjustFactor <= 0 || column[nColumns].adjustFactor >= 1)
167 SDDS_Bomb(
"invalid -column syntax. adjustFactor must be (0,1)");
169 if (column[nColumns].flags & COLUMN_DELTA_GUESS) {
170 if (column[nColumns].flags & ~(COLUMN_DELTA_GUESS | COLUMN_ADJUST_FACTOR))
171 SDDS_Bomb(
"invalid -column syntax. Can't combine deltaGuess with other options.");
172 if (column[nColumns].deltaGuess <= 0)
173 SDDS_Bomb(
"invalid -column syntax. deltaGuess<=0.");
175 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].flags & COLUMN_MAXIMUM_BINS)
176 SDDS_Bomb(
"invalid -column syntax. Can't give minimumBins and maximumBins with binFactor");
177 if (!(column[nColumns].flags & COLUMN_BIN_FACTOR) && !(column[nColumns].flags & COLUMN_MAXIMUM_BINS))
178 SDDS_Bomb(
"invalid -column syntax. Give maximumBins or binFactor");
179 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].binFactor < 1)
180 SDDS_Bomb(
"invalid -column syntax. binFactor<1");
186 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
192 case CLO_MAKE_PARAMETERS:
196 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
202 input = scanned[iArg].list[0];
204 output = scanned[iArg].list[0];
218 AddParameterDefinitions(&SDDSout, &SDDSin, column, nColumns);
226 for (iColumn = 0; iColumn < nColumns; iColumn++) {
229 SnapDataToGrid(data, rows, column + iColumn, verbose, &min, &max, &points);
233 StoreGridParameters(&SDDSout, column[iColumn].name, min, max, points);
252void SnapDataToGrid(
double *data, int64_t rows,
COLUMN_TO_SNAP *column,
int verbose,
253 double *minReturn,
double *maxReturn, int64_t *pointsReturn) {
254 int64_t i, bins, centroids;
255 double min, max, hmin, hmax, span, middle;
257 double *histogram, *whistogram, *centroid, delta;
260 if ((span = max - min) <= 0)
264 span *= 1 + 2.0 / rows;
265 middle = (max + min) / 2;
266 hmin = middle - span / 2;
267 hmax = middle + span / 2;
270 if (column->flags & COLUMN_DELTA_GUESS)
271 bins = (hmax - hmin) / (column->deltaGuess / 10);
272 else if (column->flags & COLUMN_MAXIMUM_BINS)
273 bins = column->maximumBins;
274 else if (column->flags & COLUMN_BIN_FACTOR)
275 bins = rows * column->binFactor;
277 SDDS_Bomb(
"logic error. Missing flags for determination of maximum number of bins.");
280 printf(
"Working on %s with %" PRId64
" bins, span=%le, hmin=%le, hmax=%le\n",
281 column->name, bins, span, hmin, hmax);
286 histogram = calloc(bins,
sizeof(*histogram));
287 whistogram = calloc(bins,
sizeof(*whistogram));
289 printf(
"Histogramming %s with %" PRId64
" bins\n", column->name, bins);
293 for (i = 1; i < bins; i++) {
294 if (histogram[i] && histogram[i - 1])
300 bins *= column->adjustFactor;
304 snprintf(s, 16384,
"Unable to snap data for %s to grid\n", column->name);
310 centroid =
tmalloc(
sizeof(*centroid) * bins);
312 for (i = 0; i < bins; i++) {
318 centroid[centroids++] = whistogram[i] / histogram[i];
323 delta = (centroid[centroids - 1] - centroid[0]) / (centroids - 1);
325 for (i = 0; i < rows; i++)
326 data[i] = ((
long)((data[i] - centroid[0]) / delta + 0.5)) * delta + centroid[0];
328 printf(
"Completed work for %s: delta = %le, start = %le, locations = %" PRId64
"\n",
329 column->name, delta, centroid[0], centroids);
332 *minReturn = centroid[0];
333 *pointsReturn = centroids;
334 *maxReturn = centroid[0] + (centroids - 1) * delta;
343 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
345 for (icol = 0; icol < nColumns; icol++) {
346 snprintf(name1, BUFLEN,
"%sMinimum", column[icol].name);
347 snprintf(name2, BUFLEN,
"%sMaximum", column[icol].name);
348 snprintf(name3, BUFLEN,
"%sInterval", column[icol].name);
349 snprintf(name4, BUFLEN,
"%sDimension", column[icol].name);
358void StoreGridParameters(
SDDS_DATASET *SDDSout,
char *column,
double min,
double max, int64_t points) {
359 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
361 snprintf(name1, BUFLEN,
"%sMinimum", column);
362 snprintf(name2, BUFLEN,
"%sMaximum", column);
363 snprintf(name3, BUFLEN,
"%sInterval", column);
364 snprintf(name4, BUFLEN,
"%sDimension", column);
368 name3, points > 1 ? (max - min) / (points - 1) : -1,
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Utility functions for SDDS dataset manipulation and string array operations.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.