SDDSlib
Loading...
Searching...
No Matches
sddssnap2grid.c File Reference

Snap data columns to a regular grid in SDDS files. More...

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"
#include "SDDSutils.h"
#include <ctype.h>

Go to the source code of this file.

Classes

struct  COLUMN_TO_SNAP
 

Macros

#define COLUMN_MAXIMUM_BINS   0x01UL
 
#define COLUMN_BIN_FACTOR   0x02UL
 
#define COLUMN_DELTA_GUESS   0x04UL
 
#define COLUMN_ADJUST_FACTOR   0x08UL
 
#define BUFLEN   16834
 

Enumerations

enum  option_type {
  CLO_PIPE , CLO_COLUMN , CLO_VERBOSE , CLO_MAKE_PARAMETERS ,
  N_OPTIONS
}
 

Functions

void SnapDataToGrid (double *data, int64_t rows, COLUMN_TO_SNAP *column, int verbose, double *min, double *max, int64_t *points)
 
void AddParameterDefinitions (SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, COLUMN_TO_SNAP *column, long nColumns)
 
void StoreGridParameters (SDDS_DATASET *SDDSout, char *column, double min, double max, int64_t points)
 
int main (int argc, char **argv)
 

Variables

char * option [N_OPTIONS]
 
static char * USAGE
 

Detailed Description

Snap data columns to a regular grid in SDDS files.

This program processes SDDS files to adjust specified data columns so that their values align to a regular grid. The program supports options for controlling the bin size, adjustment factor, and initial grid spacing guess. It also optionally generates parameters describing the grid such as minimum, maximum, interval, and dimension.

Key Features

  • Aligns data columns to a grid using adaptive binning.
  • Supports custom maximum bins, bin factors, and delta guess values.
  • Reports grid parameters (minimum, maximum, interval, and points).
  • Verbose mode for detailed processing output.

Options

  • -pipe: Standard SDDS toolkit pipe option for input/output.
  • -column: Specify a column to process with options for bin settings.
  • -makeParameters: Store grid information as parameters in the output file.
  • -verbose: Display detailed information during processing.

Example Usage

sddssnap2grid input.sdds output.sdds -column=Position,binFactor=20
sddssnap2grid -pipe=input,output -column=Time,deltaGuess=0.01
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Author
M. Borland, R. Soliday

Definition in file sddssnap2grid.c.

Macro Definition Documentation

◆ BUFLEN

#define BUFLEN   16834

Definition at line 326 of file sddssnap2grid.c.

◆ COLUMN_ADJUST_FACTOR

#define COLUMN_ADJUST_FACTOR   0x08UL

Definition at line 72 of file sddssnap2grid.c.

◆ COLUMN_BIN_FACTOR

#define COLUMN_BIN_FACTOR   0x02UL

Definition at line 70 of file sddssnap2grid.c.

◆ COLUMN_DELTA_GUESS

#define COLUMN_DELTA_GUESS   0x04UL

Definition at line 71 of file sddssnap2grid.c.

◆ COLUMN_MAXIMUM_BINS

#define COLUMN_MAXIMUM_BINS   0x01UL

Definition at line 69 of file sddssnap2grid.c.

Enumeration Type Documentation

◆ option_type

enum option_type

Definition at line 47 of file sddssnap2grid.c.

47 {
48 CLO_PIPE,
49 CLO_COLUMN,
50 CLO_VERBOSE,
51 CLO_MAKE_PARAMETERS,
52 N_OPTIONS
53};

Function Documentation

◆ AddParameterDefinitions()

void AddParameterDefinitions ( SDDS_DATASET * SDDSout,
SDDS_DATASET * SDDSin,
COLUMN_TO_SNAP * column,
long nColumns )

Definition at line 327 of file sddssnap2grid.c.

327 {
328 long icol;
329 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
330
331 for (icol = 0; icol < nColumns; icol++) {
332 snprintf(name1, BUFLEN, "%sMinimum", column[icol].name);
333 snprintf(name2, BUFLEN, "%sMaximum", column[icol].name);
334 snprintf(name3, BUFLEN, "%sInterval", column[icol].name);
335 snprintf(name4, BUFLEN, "%sDimension", column[icol].name);
336 if (!SDDS_DefineParameterLikeColumn(SDDSout, SDDSin, column[icol].name, name1) ||
337 !SDDS_DefineParameterLikeColumn(SDDSout, SDDSin, column[icol].name, name2) ||
338 !SDDS_DefineParameterLikeColumn(SDDSout, SDDSin, column[icol].name, name3) ||
339 !SDDS_DefineSimpleParameter(SDDSout, name4, NULL, SDDS_LONG64))
340 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
341 }
342}
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_DefineParameterLikeColumn(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Defines a parameter in the target dataset based on a column definition from the source dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49

◆ main()

int main ( int argc,
char ** argv )

Definition at line 103 of file sddssnap2grid.c.

103 {
104 int iArg, iColumn, verbose;
105 char *input, *output;
106 unsigned long pipeFlags;
107 SCANNED_ARG *scanned;
108 SDDS_DATASET SDDSin, SDDSout;
109 COLUMN_TO_SNAP *column;
110 long nColumns;
111 double *data;
112 int64_t rows;
113 short makeParameters;
114 double min, max;
115 int64_t points;
116
118 argc = scanargs(&scanned, argc, argv);
119 if (argc < 3)
120 bomb(NULL, USAGE);
121
122 nColumns = 0;
123 column = NULL;
124 input = output = NULL;
125 pipeFlags = 0;
126 verbose = 0;
127 makeParameters = 0;
128
129 for (iArg = 1; iArg < argc; iArg++) {
130 if (scanned[iArg].arg_type == OPTION) {
131 /* process options here */
132 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
133 case CLO_COLUMN:
134 column = SDDS_Realloc(column, sizeof(*column) * (nColumns + 1));
135 column[nColumns].name = scanned[iArg].list[1];
136 column[nColumns].maximumBins = -1;
137 column[nColumns].binFactor = 10;
138 column[nColumns].deltaGuess = -1;
139 column[nColumns].flags = COLUMN_BIN_FACTOR;
140 column[nColumns].adjustFactor = 0.9;
141 scanned[iArg].n_items -= 2;
142 if (scanned[iArg].n_items) {
143 if (!scanItemList(&(column[nColumns].flags), scanned[iArg].list + 2, &scanned[iArg].n_items, 0,
144 "maximumbins", SDDS_LONG, &(column[nColumns].maximumBins), 1, COLUMN_MAXIMUM_BINS,
145 "binfactor", SDDS_LONG, &(column[nColumns].binFactor), 1, COLUMN_BIN_FACTOR,
146 "deltaguess", SDDS_DOUBLE, &(column[nColumns].deltaGuess), 1, COLUMN_DELTA_GUESS,
147 "adjustfactor", SDDS_DOUBLE, &(column[nColumns].adjustFactor), 1, COLUMN_ADJUST_FACTOR,
148 NULL)) {
149 SDDS_Bomb("invalid -column syntax");
150 }
151 if (column[nColumns].flags & COLUMN_ADJUST_FACTOR) {
152 if (column[nColumns].adjustFactor <= 0 || column[nColumns].adjustFactor >= 1)
153 SDDS_Bomb("invalid -column syntax. adjustFactor must be (0,1)");
154 }
155 if (column[nColumns].flags & COLUMN_DELTA_GUESS) {
156 if (column[nColumns].flags & ~(COLUMN_DELTA_GUESS | COLUMN_ADJUST_FACTOR))
157 SDDS_Bomb("invalid -column syntax. Can't combine deltaGuess with other options.");
158 if (column[nColumns].deltaGuess <= 0)
159 SDDS_Bomb("invalid -column syntax. deltaGuess<=0.");
160 } else {
161 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].flags & COLUMN_MAXIMUM_BINS)
162 SDDS_Bomb("invalid -column syntax. Can't give minimumBins and maximumBins with binFactor");
163 if (!(column[nColumns].flags & COLUMN_BIN_FACTOR) && !(column[nColumns].flags & COLUMN_MAXIMUM_BINS))
164 SDDS_Bomb("invalid -column syntax. Give maximumBins or binFactor");
165 if (column[nColumns].flags & COLUMN_BIN_FACTOR && column[nColumns].binFactor < 1)
166 SDDS_Bomb("invalid -column syntax. binFactor<1");
167 }
168 }
169 nColumns++;
170 break;
171 case CLO_PIPE:
172 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
173 SDDS_Bomb("invalid -pipe syntax");
174 break;
175 case CLO_VERBOSE:
176 verbose = 1;
177 break;
178 case CLO_MAKE_PARAMETERS:
179 makeParameters = 1;
180 break;
181 default:
182 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
183 exit(EXIT_FAILURE);
184 break;
185 }
186 } else {
187 if (!input)
188 input = scanned[iArg].list[0];
189 else if (!output)
190 output = scanned[iArg].list[0];
191 else
192 SDDS_Bomb("too many filenames seen");
193 }
194 }
195
196 processFilenames("sddssnap2grid", &input, &output, pipeFlags, 0, NULL);
197
198 if (!SDDS_InitializeInput(&SDDSin, input))
199 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
200
201 if (!SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
202 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
203 if (makeParameters)
204 AddParameterDefinitions(&SDDSout, &SDDSin, column, nColumns);
205 if (!SDDS_WriteLayout(&SDDSout))
206 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
207
208 while (SDDS_ReadPage(&SDDSin) > 0) {
209 rows = SDDS_CountRowsOfInterest(&SDDSin);
210 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
211 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
212 for (iColumn = 0; iColumn < nColumns; iColumn++) {
213 if (!(data = SDDS_GetColumnInDoubles(&SDDSout, column[iColumn].name)))
214 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
215 SnapDataToGrid(data, rows, column + iColumn, verbose, &min, &max, &points);
216 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, data, rows, column[iColumn].name))
217 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
218 if (makeParameters)
219 StoreGridParameters(&SDDSout, column[iColumn].name, min, max, points);
220 free(data);
221 }
222 if (!SDDS_WritePage(&SDDSout))
223 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
224 }
225
226 if (!SDDS_Terminate(&SDDSin)) {
227 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
228 exit(EXIT_FAILURE);
229 }
230 if (!SDDS_Terminate(&SDDSout)) {
231 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
232 exit(EXIT_FAILURE);
233 }
234
235 return EXIT_SUCCESS;
236}
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.

◆ SnapDataToGrid()

void SnapDataToGrid ( double * data,
int64_t rows,
COLUMN_TO_SNAP * column,
int verbose,
double * min,
double * max,
int64_t * points )

Definition at line 238 of file sddssnap2grid.c.

239 {
240 int64_t i, bins, centroids;
241 double min, max, hmin, hmax, span, middle;
242 char s[16384];
243 double *histogram, *whistogram, *centroid, delta;
244
245 find_min_max(&min, &max, data, rows);
246 if ((span = max - min) <= 0)
247 return;
248
249 /* Add some buffer space at the ends for histogramming */
250 span *= 1 + 2.0 / rows;
251 middle = (max + min) / 2;
252 hmin = middle - span / 2;
253 hmax = middle + span / 2;
254
255 bins = 0; /* prevent compiler warning */
256 if (column->flags & COLUMN_DELTA_GUESS)
257 bins = (hmax - hmin) / (column->deltaGuess / 10);
258 else if (column->flags & COLUMN_MAXIMUM_BINS)
259 bins = column->maximumBins;
260 else if (column->flags & COLUMN_BIN_FACTOR)
261 bins = rows * column->binFactor;
262 else
263 SDDS_Bomb("logic error. Missing flags for determination of maximum number of bins.");
264
265 if (verbose) {
266 printf("Working on %s with %" PRId64 " bins, span=%le, hmin=%le, hmax=%le\n",
267 column->name, bins, span, hmin, hmax);
268 fflush(stdout);
269 }
270
271 while (bins >= 2) {
272 histogram = calloc(bins, sizeof(*histogram));
273 whistogram = calloc(bins, sizeof(*whistogram));
274 if (verbose) {
275 printf("Histogramming %s with %" PRId64 " bins\n", column->name, bins);
276 fflush(stdout);
277 }
278 make_histogram(histogram, bins, hmin, hmax, data, rows, 1);
279 for (i = 1; i < bins; i++) {
280 if (histogram[i] && histogram[i - 1])
281 break;
282 }
283 if (i != bins) {
284 /* indicates that two adjacent bins are occupied, so we need more coarse grouping */
285 free(histogram);
286 bins *= column->adjustFactor;
287 continue;
288 }
289 if (bins <= 1) {
290 snprintf(s, 16384, "Unable to snap data for %s to grid\n", column->name);
291 SDDS_Bomb(s);
292 }
293
294 /* self-weighted histogram gives centroid*number in each bin */
295 make_histogram_weighted(whistogram, bins, hmin, hmax, data, rows, 1, data);
296 centroid = tmalloc(sizeof(*centroid) * bins);
297 centroids = 0;
298 for (i = 0; i < bins; i++) {
299 if (histogram[i]) {
300 /*
301 printf("centroid[%ld] = %le (%le/%le)\n", centroids, whistogram[i]/histogram[i],
302 whistogram[i], histogram[i]);
303 */
304 centroid[centroids++] = whistogram[i] / histogram[i];
305 }
306 }
307 free(histogram);
308 free(whistogram);
309 delta = (centroid[centroids - 1] - centroid[0]) / (centroids - 1);
310 /* assign new value using the computed delta and the first centroid as the first value */
311 for (i = 0; i < rows; i++)
312 data[i] = ((long)((data[i] - centroid[0]) / delta + 0.5)) * delta + centroid[0];
313 if (verbose) {
314 printf("Completed work for %s: delta = %le, start = %le, locations = %" PRId64 "\n",
315 column->name, delta, centroid[0], centroids);
316 fflush(stdout);
317 }
318 *minReturn = centroid[0];
319 *pointsReturn = centroids;
320 *maxReturn = centroid[0] + (centroids - 1) * delta;
321 free(centroid);
322 return;
323 }
324}
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
int find_min_max(double *min, double *max, double *list, int64_t n)
Finds the minimum and maximum values in a list of doubles.
Definition findMinMax.c:33
long make_histogram_weighted(double *hist, long n_bins, double lo, double hi, double *data, long n_pts, long new_start, double *weight)
Compiles a weighted histogram from data points.
long make_histogram(double *hist, long n_bins, double lo, double hi, double *data, int64_t n_pts, long new_start)
Compiles a histogram from data points.

◆ StoreGridParameters()

void StoreGridParameters ( SDDS_DATASET * SDDSout,
char * column,
double min,
double max,
int64_t points )

Definition at line 344 of file sddssnap2grid.c.

344 {
345 char name1[BUFLEN], name2[BUFLEN], name3[BUFLEN], name4[BUFLEN];
346
347 snprintf(name1, BUFLEN, "%sMinimum", column);
348 snprintf(name2, BUFLEN, "%sMaximum", column);
349 snprintf(name3, BUFLEN, "%sInterval", column);
350 snprintf(name4, BUFLEN, "%sDimension", column);
351 if (!SDDS_SetParameters(SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
352 name1, min,
353 name2, max,
354 name3, points > 1 ? (max - min) / (points - 1) : -1,
355 name4, points,
356 NULL))
357 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
358}
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)

Variable Documentation

◆ option

char* option[N_OPTIONS]
Initial value:
= {
"pipe",
"column",
"verbose",
"makeparameters"
}

Definition at line 62 of file sddssnap2grid.c.

62 {
63 "pipe",
64 "column",
65 "verbose",
66 "makeparameters"
67};

◆ USAGE

char* USAGE
static
Initial value:
=
"sddssnap2grid [<inputfile>] [<outputfile>] [-pipe=[input][,output]]\n"
" -column=<name>,[{maximumBins=<value> | binFactor=<value>} | deltaGuess=<value>][,adjustFactor=<value>]\n"
" [-column=...]\n"
" [-makeParameters] [-verbose]\n"
"\n"
"Options:\n"
" -pipe Standard SDDS Toolkit pipe option.\n"
" -column Specify the name of a column to modify for equispaced values.\n"
" The default mode uses binFactor = 10, meaning the maximum number\n"
" of bins is 10 times the number of data points. The algorithm works as follows:\n"
" 1. Bin the data with the maximum number of bins.\n"
" 2. If no two adjacent bins are populated, use this grouping to compute\n"
" centroids for each subset, providing delta values.\n"
" 3. If two adjacent bins are populated, multiply the number of bins by\n"
" adjustFactor (default: 0.9) and repeat the process.\n"
" Alternatively, you can provide a guess for the grid spacing;\n"
" the algorithm will use 1/10 of this as the initial bin size.\n"
" -makeParameters\n"
" Store grid parameters in the output file as parameters named\n"
" <name>Minimum, <name>Maximum, <name>Interval, and <name>Dimension.\n"
" -verbose Report the computed deltas and the number of grid points.\n"
"\n"
"Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n"

Definition at line 74 of file sddssnap2grid.c.