SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddssnap2grid.c File Reference

Detailed Description

Snap data columns to a regular grid in SDDS files.

This program processes SDDS files to adjust specified data columns so that their values align to a regular grid. The program supports options for controlling the bin size, adjustment factor, and initial grid spacing guess. It also optionally generates parameters describing the grid such as minimum, maximum, interval, and dimension.

Usage

sddssnap2grid <inputfile> <outputfile>
[-pipe=[input][,output]]
-column=<name>,[{maximumBins=<value>|binFactor=<value>|deltaGuess=<value>}][,adjustFactor=<value>]
[-makeParameters]
[-verbose]

Options

Required Description
-column Specify the column name and bin settings (e.g., binFactor, maximumBins, deltaGuess).
Optional Description
-pipe Standard SDDS Toolkit pipe option for input/output.
-makeParameters Store grid information as parameters in the output file.
-verbose Display detailed information during processing.

Incompatibilities

  • -column:
    • Only one of maximumBins or binFactor or deltaGuess may be specified.

Requirements

  • For -column:
    • adjustFactor must be in the range (0, 1).
    • deltaGuess must be positive.
    • If binFactor is provided, it must be greater than or equal to 1.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
M. Borland, R. Soliday

Definition in file sddssnap2grid.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include "SDDSutils.h"
#include <ctype.h>

Go to the source code of this file.

Functions

void SnapDataToGrid (double *data, int64_t rows, COLUMN_TO_SNAP *column, int verbose, double *min, double *max, int64_t *points)
 
void AddParameterDefinitions (SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, COLUMN_TO_SNAP *column, long nColumns)
 
void StoreGridParameters (SDDS_DATASET *SDDSout, char *column, double min, double max, int64_t points)
 
int main (int argc, char **argv)
 

Function Documentation

◆ AddParameterDefinitions()

void AddParameterDefinitions ( SDDS_DATASET * SDDSout,
SDDS_DATASET * SDDSin,
COLUMN_TO_SNAP * column,
long nColumns )

Definition at line 341 of file sddssnap2grid.c.

341 {
342 long icol;
343 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
344
345 for (icol = 0; icol < nColumns; icol++) {
346 snprintf(name1, BUFLEN, "%sMinimum", column[icol].name);
347 snprintf(name2, BUFLEN, "%sMaximum", column[icol].name);
348 snprintf(name3, BUFLEN, "%sInterval", column[icol].name);
349 snprintf(name4, BUFLEN, "%sDimension", column[icol].name);
350 if (!SDDS_DefineParameterLikeColumn(SDDSout, SDDSin, column[icol].name, name1) ||
351 !SDDS_DefineParameterLikeColumn(SDDSout, SDDSin, column[icol].name, name2) ||
352 !SDDS_DefineParameterLikeColumn(SDDSout, SDDSin, column[icol].name, name3) ||
353 !SDDS_DefineSimpleParameter(SDDSout, name4, NULL, SDDS_LONG64))
354 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
355 }
356}
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49

◆ main()

int main ( int argc,
char ** argv )

Definition at line 117 of file sddssnap2grid.c.

117 {
118 int iArg, iColumn, verbose;
119 char *input, *output;
120 unsigned long pipeFlags;
121 SCANNED_ARG *scanned;
122 SDDS_DATASET SDDSin, SDDSout;
123 COLUMN_TO_SNAP *column;
124 long nColumns;
125 double *data;
126 int64_t rows;
127 short makeParameters;
128 double min, max;
129 int64_t points;
130
132 argc = scanargs(&scanned, argc, argv);
133 if (argc < 3)
134 bomb(NULL, USAGE);
135
136 nColumns = 0;
137 column = NULL;
138 input = output = NULL;
139 pipeFlags = 0;
140 verbose = 0;
141 makeParameters = 0;
142
143 for (iArg = 1; iArg < argc; iArg++) {
144 if (scanned[iArg].arg_type == OPTION) {
145 /* process options here */
146 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
147 case CLO_COLUMN:
148 column = SDDS_Realloc(column, sizeof(*column) * (nColumns + 1));
149 column[nColumns].name = scanned[iArg].list[1];
150 column[nColumns].maximumBins = -1;
151 column[nColumns].binFactor = 10;
152 column[nColumns].deltaGuess = -1;
153 column[nColumns].flags = COLUMN_BIN_FACTOR;
154 column[nColumns].adjustFactor = 0.9;
155 scanned[iArg].n_items -= 2;
156 if (scanned[iArg].n_items) {
157 if (!scanItemList(&(column[nColumns].flags), scanned[iArg].list + 2, &scanned[iArg].n_items, 0,
158 "maximumbins", SDDS_LONG, &(column[nColumns].maximumBins), 1, COLUMN_MAXIMUM_BINS,
159 "binfactor", SDDS_LONG, &(column[nColumns].binFactor), 1, COLUMN_BIN_FACTOR,
160 "deltaguess", SDDS_DOUBLE, &(column[nColumns].deltaGuess), 1, COLUMN_DELTA_GUESS,
161 "adjustfactor", SDDS_DOUBLE, &(column[nColumns].adjustFactor), 1, COLUMN_ADJUST_FACTOR,
162 NULL)) {
163 SDDS_Bomb("invalid -column syntax");
164 }
165 if (column[nColumns].flags & COLUMN_ADJUST_FACTOR) {
166 if (column[nColumns].adjustFactor <= 0 || column[nColumns].adjustFactor >= 1)
167 SDDS_Bomb("invalid -column syntax. adjustFactor must be (0,1)");
168 }
169 if (column[nColumns].flags & COLUMN_DELTA_GUESS) {
170 if (column[nColumns].flags & ~(COLUMN_DELTA_GUESS | COLUMN_ADJUST_FACTOR))
171 SDDS_Bomb("invalid -column syntax. Can't combine deltaGuess with other options.");
172 if (column[nColumns].deltaGuess <= 0)
173 SDDS_Bomb("invalid -column syntax. deltaGuess<=0.");
174 } else {
175 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].flags & COLUMN_MAXIMUM_BINS)
176 SDDS_Bomb("invalid -column syntax. Can't give minimumBins and maximumBins with binFactor");
177 if (!(column[nColumns].flags & COLUMN_BIN_FACTOR) && !(column[nColumns].flags & COLUMN_MAXIMUM_BINS))
178 SDDS_Bomb("invalid -column syntax. Give maximumBins or binFactor");
179 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].binFactor < 1)
180 SDDS_Bomb("invalid -column syntax. binFactor<1");
181 }
182 }
183 nColumns++;
184 break;
185 case CLO_PIPE:
186 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
187 SDDS_Bomb("invalid -pipe syntax");
188 break;
189 case CLO_VERBOSE:
190 verbose = 1;
191 break;
192 case CLO_MAKE_PARAMETERS:
193 makeParameters = 1;
194 break;
195 default:
196 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
197 exit(EXIT_FAILURE);
198 break;
199 }
200 } else {
201 if (!input)
202 input = scanned[iArg].list[0];
203 else if (!output)
204 output = scanned[iArg].list[0];
205 else
206 SDDS_Bomb("too many filenames seen");
207 }
208 }
209
210 processFilenames("sddssnap2grid", &input, &output, pipeFlags, 0, NULL);
211
212 if (!SDDS_InitializeInput(&SDDSin, input))
213 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
214
215 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
216 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
217 if (makeParameters)
218 AddParameterDefinitions(&SDDSout, &SDDSin, column, nColumns);
219 if (!SDDS_WriteLayout(&SDDSout))
220 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
221
222 while (SDDS_ReadPage(&SDDSin) > 0) {
223 rows = SDDS_CountRowsOfInterest(&SDDSin);
224 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
225 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
226 for (iColumn = 0; iColumn < nColumns; iColumn++) {
227 if (!(data = SDDS_GetColumnInDoubles(&SDDSout, column[iColumn].name)))
228 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
229 SnapDataToGrid(data, rows, column + iColumn, verbose, &min, &max, &points);
230 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, data, rows, column[iColumn].name))
231 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
232 if (makeParameters)
233 StoreGridParameters(&SDDSout, column[iColumn].name, min, max, points);
234 free(data);
235 }
236 if (!SDDS_WritePage(&SDDSout))
237 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
238 }
239
240 if (!SDDS_Terminate(&SDDSin)) {
241 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
242 exit(EXIT_FAILURE);
243 }
244 if (!SDDS_Terminate(&SDDSout)) {
245 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
246 exit(EXIT_FAILURE);
247 }
248
249 return EXIT_SUCCESS;
250}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ SnapDataToGrid()

void SnapDataToGrid ( double * data,
int64_t rows,
COLUMN_TO_SNAP * column,
int verbose,
double * min,
double * max,
int64_t * points )

Definition at line 252 of file sddssnap2grid.c.

253 {
254 int64_t i, bins, centroids;
255 double min, max, hmin, hmax, span, middle;
256 char s[16384];
257 double *histogram, *whistogram, *centroid, delta;
258
259 find_min_max(&min, &max, data, rows);
260 if ((span = max - min) <= 0)
261 return;
262
263 /* Add some buffer space at the ends for histogramming */
264 span *= 1 + 2.0 / rows;
265 middle = (max + min) / 2;
266 hmin = middle - span / 2;
267 hmax = middle + span / 2;
268
269 bins = 0; /* prevent compiler warning */
270 if (column->flags & COLUMN_DELTA_GUESS)
271 bins = (hmax - hmin) / (column->deltaGuess / 10);
272 else if (column->flags & COLUMN_MAXIMUM_BINS)
273 bins = column->maximumBins;
274 else if (column->flags & COLUMN_BIN_FACTOR)
275 bins = rows * column->binFactor;
276 else
277 SDDS_Bomb("logic error. Missing flags for determination of maximum number of bins.");
278
279 if (verbose) {
280 printf("Working on %s with %" PRId64 " bins, span=%le, hmin=%le, hmax=%le\n",
281 column->name, bins, span, hmin, hmax);
282 fflush(stdout);
283 }
284
285 while (bins >= 2) {
286 histogram = calloc(bins, sizeof(*histogram));
287 whistogram = calloc(bins, sizeof(*whistogram));
288 if (verbose) {
289 printf("Histogramming %s with %" PRId64 " bins\n", column->name, bins);
290 fflush(stdout);
291 }
292 make_histogram(histogram, bins, hmin, hmax, data, rows, 1);
293 for (i = 1; i < bins; i++) {
294 if (histogram[i] && histogram[i - 1])
295 break;
296 }
297 if (i != bins) {
298 /* indicates that two adjacent bins are occupied, so we need more coarse grouping */
299 free(histogram);
300 bins *= column->adjustFactor;
301 continue;
302 }
303 if (bins <= 1) {
304 snprintf(s, 16384, "Unable to snap data for %s to grid\n", column->name);
305 SDDS_Bomb(s);
306 }
307
308 /* self-weighted histogram gives centroid*number in each bin */
309 make_histogram_weighted(whistogram, bins, hmin, hmax, data, rows, 1, data);
310 centroid = tmalloc(sizeof(*centroid) * bins);
311 centroids = 0;
312 for (i = 0; i < bins; i++) {
313 if (histogram[i]) {
314 /*
315 printf("centroid[%ld] = %le (%le/%le)\n", centroids, whistogram[i]/histogram[i],
316 whistogram[i], histogram[i]);
317 */
318 centroid[centroids++] = whistogram[i] / histogram[i];
319 }
320 }
321 free(histogram);
322 free(whistogram);
323 delta = (centroid[centroids - 1] - centroid[0]) / (centroids - 1);
324 /* assign new value using the computed delta and the first centroid as the first value */
325 for (i = 0; i < rows; i++)
326 data[i] = ((long)((data[i] - centroid[0]) / delta + 0.5)) * delta + centroid[0];
327 if (verbose) {
328 printf("Completed work for %s: delta = %le, start = %le, locations = %" PRId64 "\n",
329 column->name, delta, centroid[0], centroids);
330 fflush(stdout);
331 }
332 *minReturn = centroid[0];
333 *pointsReturn = centroids;
334 *maxReturn = centroid[0] + (centroids - 1) * delta;
335 free(centroid);
336 return;
337 }
338}
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.

◆ StoreGridParameters()

void StoreGridParameters ( SDDS_DATASET * SDDSout,
char * column,
double min,
double max,
int64_t points )

Definition at line 358 of file sddssnap2grid.c.

358 {
359 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
360
361 snprintf(name1, BUFLEN, "%sMinimum", column);
362 snprintf(name2, BUFLEN, "%sMaximum", column);
363 snprintf(name3, BUFLEN, "%sInterval", column);
364 snprintf(name4, BUFLEN, "%sDimension", column);
365 if (!SDDS_SetParameters(SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
366 name1, min,
367 name2, max,
368 name3, points > 1 ? (max - min) / (points - 1) : -1,
369 name4, points,
370 NULL))
371 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
372}
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)