SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsinsideboundaries.c
Go to the documentation of this file.
1/**
2 * @file sddsinsideboundaries.c
3 * @brief Analyzes data points relative to user-defined geometric boundaries.
4 *
5 * @details
6 * This program reads an SDDS (Self Describing Data Set) file and evaluates whether each data point lies inside
7 * or outside user-defined geometric boundaries specified in a separate file. It supports multithreading for
8 * improved performance and offers options to filter and save points based on their inclusion or exclusion from
9 * the specified boundaries. The input boundaries can span multiple pages.
10 *
11 * @section Usage
12 * ```
13 * sddsinsideboundaries [<inputfile>] [<outputfile>]
14 * [-pipe=[input][,output]]
15 * -columns=<x-name>,<y-name>
16 * -boundary=<filename>,<x-name>,<y-name>
17 * [-insideColumn=<column_name>]
18 * [-keep={inside|outside}]
19 * [-threads=<number>]
20 * ```
21 *
22 * @section Options
23 * | Required | Description |
24 * |---------------------------------------|---------------------------------------------------------------------------------------|
25 * | `-columns` | Specify the names of the (x, y) columns in the input file. |
26 * | `-boundary` | Provide a file containing boundary data, including x and y columns. |
27 *
28 * | Optional | Description |
29 * |---------------------------------------|---------------------------------------------------------------------------------------|
30 * | `-pipe` | Specify input and/or output via pipe. |
31 * | `-insideColumn` | Define the name of the output column indicating boundary inclusion. |
32 * | `-keep` | Filter points based on inclusion (inside) or exclusion (outside) from boundaries. |
33 * | `-threads` | Set the number of threads for computation (default: 1). |
34 *
35 * @copyright
36 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
37 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
38 *
39 * @license
40 * This file is distributed under the terms of the Software License Agreement
41 * found in the file LICENSE included with this distribution.
42 *
43 * @authors
44 * M. Borland, R. Soliday
45 */
46
47#include "mdb.h"
48#include "SDDS.h"
49#include "scan.h"
50#include "SDDSutils.h"
51#include <ctype.h>
52#if defined(linux) || (defined(_WIN32) && !defined(_MINGW))
53# include <omp.h>
54#else
55void omp_set_num_threads(int a) {}
56#endif
57
58typedef enum {
59 SET_COLUMNS,
60 SET_BOUNDARY,
61 SET_INSIDE_COLUMN,
62 SET_KEEP,
63 SET_PIPE,
64 SET_THREADS,
65 N_OPTIONS
66} option_type;
67
68char *option[N_OPTIONS] = {
69 "columns",
70 "boundary",
71 "insideColumn",
72 "keep",
73 "pipe",
74 "threads",
75};
76
77static char *USAGE = "\n" \
78 " sddsinsideboundaries [<inputfile>] [<outputfile>]\n" \
79 " [-pipe=[input][,output]]\n" \
80 " -columns=<x-name>,<y-name>\n" \
81 " -boundary=<filename>,<x-name>,<y-name>\n" \
82 " [-insideColumn=<column_name>]\n" \
83 " [-keep={inside|outside}]\n" \
84 " [-threads=<number>]\n" \
85 "Options:\n" \
86 " -columns=<x-name>,<y-name>\n" \
87 " Specify the names of the (x, y) columns in the input file.\n" \
88 " -boundary=<filename>,<x-name>,<y-name>\n" \
89 " Provide a file with boundary data, including x and y columns.\n" \
90 " The file can have multiple pages.\n" \
91 " -insideColumn=<column_name>\n" \
92 " Specify the name of the output column for the count of boundaries\n" \
93 " containing each point (default: InsideSum).\n" \
94 " -keep={inside|outside}\n" \
95 " Filter points:\n" \
96 " inside - Keep only points inside any boundary.\n" \
97 " outside - Keep only points outside all boundaries.\n" \
98 " By default, all points are kept.\n" \
99 " -threads=<number>\n" \
100 " Set the number of threads for computation (default: 1).\n\n" \
101 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
102
103typedef enum {
104 KEEP_ALL,
105 KEEP_INSIDE,
106 KEEP_OUTSIDE,
107 N_KEEP_OPTIONS
108} keep_option_types;
109
110char *keepOption[N_KEEP_OPTIONS] = {
111 "all",
112 "inside",
113 "outside"
114};
115
116long compute_inside_sum(double x, double y, double **xBoundary, double **yBoundary, int64_t *nValues, long nBoundaries);
117long read_boundary_data(char *boundaryInput, char *bxColumn, char *byColumn, double ***xBoundary, double ***yBoundary, int64_t **nValues);
118
119double *xData, *yData;
120double **xBoundary = NULL, **yBoundary = NULL;
121int64_t *nValues = NULL, rows;
122long nBoundaries = 0;
123int threads = 1;
124int32_t *insideSumData;
125
126int main(int argc, char **argv) {
127 int iArg;
128 SCANNED_ARG *scanned = NULL;
129 char *input = NULL, *output = NULL, *boundaryInput = NULL;
130 char *xColumn = NULL, *yColumn = NULL, *bxColumn = NULL, *byColumn = NULL;
131 char *insideColumn = "InsideSum";
132 SDDS_DATASET SDDSin, SDDSout;
133 int32_t *keep;
134 long keepCode = KEEP_ALL;
135 long readCode, keepSeen = 0;
136 int64_t i;
137 unsigned long pipeFlags = 0;
138
140 argc = scanargs(&scanned, argc, argv);
141 if (argc < 3)
142 bomb(NULL, USAGE);
143
144 for (iArg = 1; iArg < argc; iArg++) {
145 if (scanned[iArg].arg_type == OPTION) {
146 /* process options here */
147 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
148 case SET_COLUMNS:
149 if (xColumn || yColumn)
150 SDDS_Bomb("only one -columns option may be given");
151 if (scanned[iArg].n_items != 3)
152 SDDS_Bomb("invalid -columns syntax");
153 xColumn = scanned[iArg].list[1];
154 yColumn = scanned[iArg].list[2];
155 break;
156 case SET_BOUNDARY:
157 if (boundaryInput)
158 SDDS_Bomb("only one -boundary option may be given");
159 if (scanned[iArg].n_items != 4)
160 SDDS_Bomb("invalid -boundary syntax");
161 boundaryInput = scanned[iArg].list[1];
162 bxColumn = scanned[iArg].list[2];
163 byColumn = scanned[iArg].list[3];
164 break;
165 case SET_INSIDE_COLUMN:
166 if (scanned[iArg].n_items != 2)
167 SDDS_Bomb("invalid -insideColumn syntax");
168 insideColumn = scanned[iArg].list[1];
169 break;
170 case SET_KEEP:
171 if (keepSeen)
172 SDDS_Bomb("only one -keep option may be given");
173 if (scanned[iArg].n_items != 2)
174 SDDS_Bomb("invalid -keep syntax");
175 if ((keepCode = match_string(scanned[iArg].list[1], keepOption, N_KEEP_OPTIONS, 0)) < 0)
176 SDDS_Bomb("invalid -keep value. Supply 'all', 'inside', or 'outside' or a unique abbreviation");
177 break;
178 case SET_PIPE:
179 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
180 SDDS_Bomb("invalid -pipe syntax");
181 break;
182 case SET_THREADS:
183 if (scanned[iArg].n_items != 2 ||
184 sscanf(scanned[iArg].list[1], "%d", &threads) != 1 || threads < 1)
185 SDDS_Bomb("invalid -threads syntax");
186 break;
187 default:
188 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
189 exit(1);
190 break;
191 }
192 } else {
193 if (!input)
194 input = scanned[iArg].list[0];
195 else if (!output)
196 output = scanned[iArg].list[0];
197 else
198 SDDS_Bomb("too many filenames seen");
199 }
200 }
201
202 processFilenames("sddsinsideboundaries", &input, &output, pipeFlags, 0, NULL);
203 if (!pipeFlags && strcmp(input, output) == 0)
204 SDDS_Bomb("can't use same file for input and output");
205
206 if (!boundaryInput || !bxColumn || !byColumn)
207 SDDS_Bomb("-boundaries option must be given");
208 if ((nBoundaries = read_boundary_data(boundaryInput, bxColumn, byColumn, &xBoundary, &yBoundary, &nValues)) <= 0)
209 SDDS_Bomb("No valid data in boundary data file");
210
211 if (!SDDS_InitializeInput(&SDDSin, input) ||
212 !SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
213 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
214
215 if (SDDS_CheckColumn(&SDDSin, xColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)
216 SDDS_Bomb("-xColumn is not present or not numeric");
217 if (SDDS_CheckColumn(&SDDSin, yColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)
218 SDDS_Bomb("-yColumn is not present or not numeric");
219
220 if (!SDDS_DefineColumn(&SDDSout, insideColumn, NULL, NULL, "Number of boundaries containing this point", NULL, SDDS_LONG, 0))
221 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
222
223 if (!SDDS_WriteLayout(&SDDSout))
224 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
225
226 insideSumData = NULL;
227 keep = NULL;
228 xData = yData = NULL;
229 omp_set_num_threads(threads);
230 //fprintf(stderr, "threads=%d\n", threads);
231 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
232 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
233 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
234 SDDS_SetRowFlags(&SDDSout, 1);
235 rows = SDDS_CountRowsOfInterest(&SDDSout);
236 if (!(xData = SDDS_GetColumnInDoubles(&SDDSin, xColumn)) ||
237 !(yData = SDDS_GetColumnInDoubles(&SDDSin, yColumn))) {
238 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
239 }
240 insideSumData = calloc(rows, sizeof(*insideSumData));
241 keep = calloc(rows, sizeof(*keep));
242
243#pragma omp parallel for
244 for (i = 0; i < rows; i++) {
245 insideSumData[i] = compute_inside_sum(xData[i], yData[i], xBoundary, yBoundary, nValues, nBoundaries);
246 switch (keepCode) {
247 case KEEP_INSIDE:
248 keep[i] = insideSumData[i];
249 break;
250 case KEEP_OUTSIDE:
251 keep[i] = insideSumData[i] == 0;
252 break;
253 default:
254 keep[i] = 1;
255 break;
256 }
257 }
258 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, insideSumData, rows, insideColumn) ||
259 !SDDS_AssertRowFlags(&SDDSout, SDDS_FLAG_ARRAY, keep, rows) ||
260 !SDDS_WritePage(&SDDSout))
261 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
262 free(keep);
263 free(insideSumData);
264 free(xData);
265 free(yData);
266 }
267 if (!SDDS_Terminate(&SDDSin) || !SDDS_Terminate(&SDDSout))
268 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
269 return 0;
270}
271
272long read_boundary_data(char *boundaryInput, char *bxColumn, char *byColumn, double ***xBoundary, double ***yBoundary, int64_t **nValues) {
273 SDDS_DATASET SDDSin;
274 long nSets = 0;
275
276 if (!SDDS_InitializeInput(&SDDSin, boundaryInput) ||
277 SDDS_CheckColumn(&SDDSin, bxColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK ||
278 SDDS_CheckColumn(&SDDSin, byColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)
279 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
280
281 while (SDDS_ReadPage(&SDDSin) > 0) {
282 if (SDDS_RowCount(&SDDSin) == 0)
283 continue;
284 *xBoundary = SDDS_Realloc(*xBoundary, sizeof(**xBoundary) * (nSets + 1));
285 *yBoundary = SDDS_Realloc(*yBoundary, sizeof(**yBoundary) * (nSets + 1));
286 *nValues = SDDS_Realloc(*nValues, sizeof(**nValues) * (nSets + 1));
287 (*nValues)[nSets] = SDDS_RowCount(&SDDSin);
288 if (!((*xBoundary)[nSets] = SDDS_GetColumnInDoubles(&SDDSin, bxColumn)) ||
289 !((*yBoundary)[nSets] = SDDS_GetColumnInDoubles(&SDDSin, byColumn)))
290 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
291 nSets++;
292 }
293 return nSets;
294}
295
296long compute_inside_sum(double x, double y, double **xBoundary, double **yBoundary, int64_t *nValues, long nBoundaries) {
297 int64_t ib;
298 long insideSum = 0;
299 for (ib = 0; ib < nBoundaries; ib++)
300 insideSum += pointIsInsideContour(x, y, xBoundary[ib], yBoundary[ib], nValues[ib], NULL, 0.0);
301 return insideSum;
302}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_AssertRowFlags(SDDS_DATASET *SDDS_dataset, uint32_t mode,...)
Sets acceptance flags for rows based on specified criteria.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetRowFlags(SDDS_DATASET *SDDS_dataset, int32_t row_flag_value)
Sets the acceptance flags for all rows in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
Utility functions for SDDS dataset manipulation and string array operations.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int pointIsInsideContour(double x0, double y0, double *x, double *y, int64_t n, double *center, double theta)
Determine if a given point (x0, y0) is inside a specified polygonal contour.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390