SDDSlib
Loading...
Searching...
No Matches
sddsinsideboundaries.c
Go to the documentation of this file.
1/**
2 * @file sddsinsideboundaries.c
3 * @brief Program for analyzing data points relative to geometric boundaries.
4 *
5 * This program reads a data file and checks whether each data point falls inside
6 * or outside user-defined boundaries specified in a separate file. The program supports
7 * multithreading for enhanced performance and allows filtering of points based on
8 * their inclusion or exclusion from the boundaries.
9 *
10 * ### Features:
11 * - Identify points inside or outside boundaries.
12 * - Filter points based on inclusion or exclusion.
13 * - Handle multiple pages of boundary data.
14 * - Multi-threaded processing.
15 *
16 * ### Options:
17 * - `-columns`: Specify the x and y columns in the input file.
18 * - `-boundary`: Provide a file containing boundary data.
19 * - `-insideColumn`: Define the name of the output column indicating boundary inclusion.
20 * - `-keep`: Filter points inside or outside boundaries.
21 * - `-threads`: Set the number of threads for computation.
22 * - `-pipe`: Enable piping for input/output.
23 *
24 * ### Dependencies:
25 * - The SDDS library for file handling.
26 * - The OpenMP library for multithreading.
27 *
28 * ### Usage Example:
29 * ```
30 * sddsinsideboundaries input.sdds output.sdds -columns=x,y -boundary=boundary.sdds,x_boundary,y_boundary -keep=inside
31 * ```
32 *
33 * @copyright
34 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
35 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
36 *
37 * @license
38 * This file is distributed under the terms of the Software License Agreement
39 * found in the file LICENSE included with this distribution.
40 *
41 * @author M. Borland, R. Soliday
42 */
43
44#include "mdb.h"
45#include "SDDS.h"
46#include "scan.h"
47#include "SDDSutils.h"
48#include <ctype.h>
49#if defined(linux) || (defined(_WIN32) && !defined(_MINGW))
50# include <omp.h>
51#else
52void omp_set_num_threads(int a) {}
53#endif
54
55typedef enum {
56 SET_COLUMNS,
57 SET_BOUNDARY,
58 SET_INSIDE_COLUMN,
59 SET_KEEP,
60 SET_PIPE,
61 SET_THREADS,
62 N_OPTIONS
63} option_type;
64
65char *option[N_OPTIONS] = {
66 "columns",
67 "boundary",
68 "insideColumn",
69 "keep",
70 "pipe",
71 "threads",
72};
73
74static char *USAGE = "\nUsage:\n" \
75 " sddsinsideboundaries [<inputfile>] [<outputfile>] [-pipe=[input][,output]]\n" \
76 " -columns=<x-name>,<y-name>\n" \
77 " Specify the names of the (x, y) columns in the input file.\n" \
78 " -boundary=<filename>,<x-name>,<y-name>\n" \
79 " Provide a file with boundary data, including x and y columns.\n" \
80 " The file can have multiple pages.\n" \
81 " -insideColumn=<column_name>\n" \
82 " Specify the name of the output column for the count of boundaries\n" \
83 " containing each point (default: InsideSum).\n" \
84 " -keep={inside|outside}\n" \
85 " Filter points:\n" \
86 " inside - Keep only points inside any boundary.\n" \
87 " outside - Keep only points outside all boundaries.\n" \
88 " By default, all points are kept.\n" \
89 " -threads=<number>\n" \
90 " Set the number of threads for computation (default: 1).\n\n" \
91 "Program by Michael Borland. (" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n";
92
93typedef enum {
94 KEEP_ALL,
95 KEEP_INSIDE,
96 KEEP_OUTSIDE,
97 N_KEEP_OPTIONS
98} keep_option_types;
99
100char *keepOption[N_KEEP_OPTIONS] = {
101 "all",
102 "inside",
103 "outside"
104};
105
106long compute_inside_sum(double x, double y, double **xBoundary, double **yBoundary, int64_t *nValues, long nBoundaries);
107long read_boundary_data(char *boundaryInput, char *bxColumn, char *byColumn, double ***xBoundary, double ***yBoundary, int64_t **nValues);
108
109double *xData, *yData;
110double **xBoundary = NULL, **yBoundary = NULL;
111int64_t *nValues = NULL, rows;
112long nBoundaries = 0;
113int threads = 1;
114int32_t *insideSumData;
115
116int main(int argc, char **argv) {
117 int iArg;
118 SCANNED_ARG *scanned = NULL;
119 char *input = NULL, *output = NULL, *boundaryInput = NULL;
120 char *xColumn = NULL, *yColumn = NULL, *bxColumn = NULL, *byColumn = NULL;
121 char *insideColumn = "InsideSum";
122 SDDS_DATASET SDDSin, SDDSout;
123 int32_t *keep;
124 long keepCode = KEEP_ALL;
125 long readCode, keepSeen = 0;
126 int64_t i;
127 unsigned long pipeFlags = 0;
128
130 argc = scanargs(&scanned, argc, argv);
131 if (argc < 3)
132 bomb(NULL, USAGE);
133
134 for (iArg = 1; iArg < argc; iArg++) {
135 if (scanned[iArg].arg_type == OPTION) {
136 /* process options here */
137 switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
138 case SET_COLUMNS:
139 if (xColumn || yColumn)
140 SDDS_Bomb("only one -columns option may be given");
141 if (scanned[iArg].n_items != 3)
142 SDDS_Bomb("invalid -columns syntax");
143 xColumn = scanned[iArg].list[1];
144 yColumn = scanned[iArg].list[2];
145 break;
146 case SET_BOUNDARY:
147 if (boundaryInput)
148 SDDS_Bomb("only one -boundary option may be given");
149 if (scanned[iArg].n_items != 4)
150 SDDS_Bomb("invalid -boundary syntax");
151 boundaryInput = scanned[iArg].list[1];
152 bxColumn = scanned[iArg].list[2];
153 byColumn = scanned[iArg].list[3];
154 break;
155 case SET_INSIDE_COLUMN:
156 if (scanned[iArg].n_items != 2)
157 SDDS_Bomb("invalid -insideColumn syntax");
158 insideColumn = scanned[iArg].list[1];
159 break;
160 case SET_KEEP:
161 if (keepSeen)
162 SDDS_Bomb("only one -keep option may be given");
163 if (scanned[iArg].n_items != 2)
164 SDDS_Bomb("invalid -keep syntax");
165 if ((keepCode = match_string(scanned[iArg].list[1], keepOption, N_KEEP_OPTIONS, 0)) < 0)
166 SDDS_Bomb("invalid -keep value. Supply 'all', 'inside', or 'outside' or a unique abbreviation");
167 break;
168 case SET_PIPE:
169 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
170 SDDS_Bomb("invalid -pipe syntax");
171 break;
172 case SET_THREADS:
173 if (scanned[iArg].n_items != 2 ||
174 sscanf(scanned[iArg].list[1], "%d", &threads) != 1 || threads < 1)
175 SDDS_Bomb("invalid -threads syntax");
176 break;
177 default:
178 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
179 exit(1);
180 break;
181 }
182 } else {
183 if (!input)
184 input = scanned[iArg].list[0];
185 else if (!output)
186 output = scanned[iArg].list[0];
187 else
188 SDDS_Bomb("too many filenames seen");
189 }
190 }
191
192 processFilenames("sddsinsideboundaries", &input, &output, pipeFlags, 0, NULL);
193 if (!pipeFlags && strcmp(input, output) == 0)
194 SDDS_Bomb("can't use same file for input and output");
195
196 if (!boundaryInput || !bxColumn || !byColumn)
197 SDDS_Bomb("-boundaries option must be given");
198 if ((nBoundaries = read_boundary_data(boundaryInput, bxColumn, byColumn, &xBoundary, &yBoundary, &nValues)) <= 0)
199 SDDS_Bomb("No valid data in boundary data file");
200
201 if (!SDDS_InitializeInput(&SDDSin, input) ||
202 !SDDS_InitializeCopy(&SDDSout, &SDDSin, output, "w"))
203 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
204
205 if (SDDS_CheckColumn(&SDDSin, xColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)
206 SDDS_Bomb("-xColumn is not present or not numeric");
207 if (SDDS_CheckColumn(&SDDSin, yColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)
208 SDDS_Bomb("-yColumn is not present or not numeric");
209
210 if (!SDDS_DefineColumn(&SDDSout, insideColumn, NULL, NULL, "Number of boundaries containing this point", NULL, SDDS_LONG, 0))
211 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
212
213 if (!SDDS_WriteLayout(&SDDSout))
214 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
215
216 insideSumData = NULL;
217 keep = NULL;
218 xData = yData = NULL;
219 omp_set_num_threads(threads);
220 //fprintf(stderr, "threads=%d\n", threads);
221 while ((readCode = SDDS_ReadPage(&SDDSin)) > 0) {
222 if (!SDDS_CopyPage(&SDDSout, &SDDSin))
223 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
224 SDDS_SetRowFlags(&SDDSout, 1);
225 rows = SDDS_CountRowsOfInterest(&SDDSout);
226 if (!(xData = SDDS_GetColumnInDoubles(&SDDSin, xColumn)) ||
227 !(yData = SDDS_GetColumnInDoubles(&SDDSin, yColumn))) {
228 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
229 }
230 insideSumData = calloc(rows, sizeof(*insideSumData));
231 keep = calloc(rows, sizeof(*keep));
232
233#pragma omp parallel for
234 for (i = 0; i < rows; i++) {
235 insideSumData[i] = compute_inside_sum(xData[i], yData[i], xBoundary, yBoundary, nValues, nBoundaries);
236 switch (keepCode) {
237 case KEEP_INSIDE:
238 keep[i] = insideSumData[i];
239 break;
240 case KEEP_OUTSIDE:
241 keep[i] = insideSumData[i] == 0;
242 break;
243 default:
244 keep[i] = 1;
245 break;
246 }
247 }
248 if (!SDDS_SetColumn(&SDDSout, SDDS_SET_BY_NAME, insideSumData, rows, insideColumn) ||
249 !SDDS_AssertRowFlags(&SDDSout, SDDS_FLAG_ARRAY, keep, rows) ||
250 !SDDS_WritePage(&SDDSout))
251 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
252 free(keep);
253 free(insideSumData);
254 free(xData);
255 free(yData);
256 }
257 if (!SDDS_Terminate(&SDDSin) || !SDDS_Terminate(&SDDSout))
258 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
259 return 0;
260}
261
262long read_boundary_data(char *boundaryInput, char *bxColumn, char *byColumn, double ***xBoundary, double ***yBoundary, int64_t **nValues) {
263 SDDS_DATASET SDDSin;
264 long nSets = 0;
265
266 if (!SDDS_InitializeInput(&SDDSin, boundaryInput) ||
267 SDDS_CheckColumn(&SDDSin, bxColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK ||
268 SDDS_CheckColumn(&SDDSin, byColumn, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)
269 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
270
271 while (SDDS_ReadPage(&SDDSin) > 0) {
272 if (SDDS_RowCount(&SDDSin) == 0)
273 continue;
274 *xBoundary = SDDS_Realloc(*xBoundary, sizeof(**xBoundary) * (nSets + 1));
275 *yBoundary = SDDS_Realloc(*yBoundary, sizeof(**yBoundary) * (nSets + 1));
276 *nValues = SDDS_Realloc(*nValues, sizeof(**nValues) * (nSets + 1));
277 (*nValues)[nSets] = SDDS_RowCount(&SDDSin);
278 if (!((*xBoundary)[nSets] = SDDS_GetColumnInDoubles(&SDDSin, bxColumn)) ||
279 !((*yBoundary)[nSets] = SDDS_GetColumnInDoubles(&SDDSin, byColumn)))
280 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
281 nSets++;
282 }
283 return nSets;
284}
285
286long compute_inside_sum(double x, double y, double **xBoundary, double **yBoundary, int64_t *nValues, long nBoundaries) {
287 int64_t ib;
288 long insideSum = 0;
289 for (ib = 0; ib < nBoundaries; ib++)
290 insideSum += pointIsInsideContour(x, y, xBoundary[ib], yBoundary[ib], nValues[ib], NULL, 0.0);
291 return insideSum;
292}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_InitializeCopy(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, char *filename, char *filemode)
Definition SDDS_copy.c:40
int32_t SDDS_CopyPage(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:578
int32_t SDDS_SetColumn(SDDS_DATASET *SDDS_dataset, int32_t mode, void *data, int64_t rows,...)
Sets the values for one data column in the current data table of an SDDS dataset.
int32_t SDDS_AssertRowFlags(SDDS_DATASET *SDDS_dataset, uint32_t mode,...)
Sets acceptance flags for rows based on specified criteria.
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_SetRowFlags(SDDS_DATASET *SDDS_dataset, int32_t row_flag_value)
Sets the acceptance flags for all rows in the current data table of a data set.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int pointIsInsideContour(double x0, double y0, double *x, double *y, int64_t n, double *center, double theta)
Determine if a given point (x0, y0) is inside a specified polygonal contour.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390