SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddssampledist.c File Reference

Detailed Description

Generates sampled distributions based on input SDDS files or direct specifications.

This program generates samples from specified distributions (Gaussian, Uniform, Poisson) or based on cumulative distribution functions (CDF/DF) provided in input SDDS files. It supports various options to customize the sampling process, including the number of samples, random seed, verbosity, and output ordering.

Usage

sddssampledist [<inputfile>] [<outputfile>]
[-pipe=[in][,out]]
-columns=independentVariable=<name>,{cdf=<CDFName> | df=<DFName>}[,output=<name>][,units=<string>][,factor=<value>][,offset=<value>][,datafile=<filename>][,haltonRadix=<primeNumber>[,haltonOffset=<integer>][,randomize[,group=<groupID>]]]
[-columns=...]
-samples=<integer>
[-seed=<integer>]
[-verbose]
[-gaussian=columnName=<columnName>[,meanValue=<value>|@<parameter_name>][,sigmaValue=<value>|@<parameter_name>][,units=<string>]]
[-uniform=columnName=<columnName>[,minimumValue=<value>|@<parameter_name>][,maximumValue=<value>|@<parameter_name>][,units=<string>]]
[-poisson=columnName=<columnName>[,meanValue=<value>|@<parameter_name>][,units=<string>]]
[-optimalHalton]
[-majorOrder=row|column]

Options

Required Description
-columns Defines independent variable and distribution function with customization options.
-samples Specifies the number of samples to generate.
Optional Description
-pipe Use standard input and/or output streams.
-seed Specifies the seed for the random number generator.
-verbose Enables verbose output.
-gaussian Samples from a Gaussian distribution.
-uniform Samples from a Uniform distribution.
-poisson Samples from a Poisson distribution.
-majorOrder Specifies the output file order as row-major or column-major.

Incompatibilities

  • -columns
    • Requires at least one independentVariable and exactly one of cdf or df.
    • If haltonRadix is used, the value must be a prime number.
    • Randomization requires valid group identifiers if group=<groupID> is specified.
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
M. Borland, R. Soliday, H. Shang

Definition in file sddssampledist.c.

#include "mdb.h"
#include "scan.h"
#include "SDDS.h"
#include <time.h>

Go to the source code of this file.

Functions

long CreatePoissonDistributionTable (double **x, double **pos_CDF, double mean)
 
int main (int argc, char **argv)
 

Function Documentation

◆ CreatePoissonDistributionTable()

long CreatePoissonDistributionTable ( double ** x,
double ** pos_CDF,
double mean )

Definition at line 795 of file sddssampledist.c.

795 {
796 long i, npoints = 20, count = 0;
797 double *pos = NULL;
798 /* SDDS_DATASET pos_out; */
799
800 *x = *pos_CDF = NULL;
801 if (!(*x = malloc(sizeof(**x) * npoints)) ||
802 !(pos = malloc(sizeof(*pos) * npoints)) ||
803 !(*pos_CDF = malloc(sizeof(**pos_CDF) * npoints)))
804 SDDS_Bomb("memory allocation failure.");
805 i = count = 0;
806 while (1) {
807 if (count + 2 >= npoints) {
808 npoints += 20;
809 *x = SDDS_Realloc(*x, sizeof(**x) * npoints);
810 *pos_CDF = SDDS_Realloc(*pos_CDF, sizeof(**pos_CDF) * npoints);
811 pos = SDDS_Realloc(pos, sizeof(*pos) * npoints);
812 }
813
814 (*x)[count] = i;
815 if (!i) {
816 pos[i] = exp(-mean);
817 (*pos_CDF)[count] = pos[i];
818 count++;
819 } else {
820 pos[i] = pos[i - 1] * mean / i;
821 (*pos_CDF)[count] = (*pos_CDF)[count - 1];
822 (*pos_CDF)[count + 1] = (*pos_CDF)[count - 1] + pos[i];
823 (*x)[count + 1] = i;
824 if (1.0 - (*pos_CDF)[count + 1] <= 1.0e-15)
825 break;
826 count += 2;
827 }
828 i++;
829 }
830 /* fprintf(stderr,"lamda=%f\n", mean);
831 if (!SDDS_InitializeOutput(&pos_out, SDDS_BINARY, 0, NULL, NULL, "pos_dist.sdds"))
832 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
833 if (!SDDS_DefineSimpleColumn(&pos_out, "Count", NULL, SDDS_DOUBLE) ||
834 !SDDS_DefineSimpleColumn(&pos_out, "P", NULL, SDDS_DOUBLE))
835 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
836 if (!SDDS_SaveLayout(&pos_out) || !SDDS_WriteLayout(&pos_out))
837 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
838 if (!SDDS_StartPage(&pos_out, count) ||
839 !SDDS_SetColumnFromDoubles(&pos_out, SDDS_SET_BY_NAME, *x, count, "Count") ||
840 !SDDS_SetColumnFromDoubles(&pos_out, SDDS_SET_BY_NAME, *pos_CDF, count, "P"))
841 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
842 if (!SDDS_WritePage(&pos_out) || !SDDS_Terminate(&pos_out))
843 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
844 */
845 free(pos);
846 return count;
847}
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677

◆ main()

int main ( int argc,
char ** argv )

Definition at line 185 of file sddssampledist.c.

185 {
186 int iArg;
187 char *input, *output, *meanPar, *sigmaPar, *maxPar, *minPar;
188 long i, mainInputOpened, haltonID = 0, requireInput = 0;
189 unsigned long pipeFlags, majorOrderFlag;
190 SCANNED_ARG *scanned;
191 SDDS_DATASET SDDSin, SDDSout, *SDDSptr;
192 long randomNumberSeed = 0;
193 SEQ_REQUEST *seqRequest;
194 long samples, seqRequests, randomizationGroups = 0;
195 int64_t j, values;
196 double *sample, *IVValue, *CDFValue;
197 char msgBuffer[1000];
198 RANDOMIZED_ORDER *randomizationData = NULL;
199 long verbose, optimalHalton = 0;
200 short columnMajorOrder = -1;
201
203 argc = scanargs(&scanned, argc, argv);
204 if (argc < 2) {
205 fprintf(stderr, "%s%s%s\n", USAGE1, USAGE2, USAGE3);
206 return EXIT_FAILURE;
207 }
208 seqRequest = NULL;
209 seqRequests = 0;
210 output = input = NULL;
211 pipeFlags = 0;
212 samples = values = 0;
213 sample = IVValue = CDFValue = NULL;
214 verbose = 0;
215 maxPar = minPar = meanPar = sigmaPar = NULL;
216
217 for (iArg = 1; iArg < argc; iArg++) {
218 if (scanned[iArg].arg_type == OPTION) {
219 /* process options here */
220 switch (match_string(scanned[iArg].list[0], option, CLO_OPTIONS, 0)) {
221 case CLO_MAJOR_ORDER:
222 majorOrderFlag = 0;
223 scanned[iArg].n_items--;
224 if (scanned[iArg].n_items > 0 &&
225 (!scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
226 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
227 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
228 SDDS_Bomb("invalid -majorOrder syntax/values");
229 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
230 columnMajorOrder = 1;
231 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
232 columnMajorOrder = 0;
233 break;
234 case CLO_COLUMNS:
235 if (scanned[iArg].n_items < 3)
236 SDDS_Bomb("invalid -columns syntax");
237 if (!(seqRequest = SDDS_Realloc(seqRequest, sizeof(*seqRequest) * (seqRequests + 1))))
238 SDDS_Bomb("memory allocation failure");
239 scanned[iArg].n_items -= 1;
240 memset(seqRequest + seqRequests, 0, sizeof(*seqRequest));
241 /* remove following pointer initialization because memset already initializes them */
242 seqRequest[seqRequests].randomizationGroup = -1;
243 seqRequest[seqRequests].factor = 1;
244 seqRequest[seqRequests].offset = 0;
245 if (!scanItemList(&seqRequest[seqRequests].flags,
246 scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
247 "datafile", SDDS_STRING, &seqRequest[seqRequests].dataFileName, 1, SEQ_DATAFILE,
248 "independentvariable", SDDS_STRING, &seqRequest[seqRequests].indepName, 1, SEQ_INDEPNAME,
249 "cdf", SDDS_STRING, &seqRequest[seqRequests].CDFName, 1, SEQ_CDFNAME,
250 "df", SDDS_STRING, &seqRequest[seqRequests].DFName, 1, SEQ_DFNAME,
251 "output", SDDS_STRING, &seqRequest[seqRequests].outputName, 1, SEQ_OUTPUTNAME,
252 "units", SDDS_STRING, &seqRequest[seqRequests].units, 1, SEQ_UNITSGIVEN,
253 "haltonradix", SDDS_LONG, &seqRequest[seqRequests].haltonRadix, 1, SEQ_HALTONRADIX,
254 "haltonoffset", SDDS_LONG, &seqRequest[seqRequests].haltonOffset, 1, SEQ_HALTONOFFSET,
255 "randomize", -1, NULL, 0, SEQ_RANDOMIZE,
256 "group", SDDS_LONG, &seqRequest[seqRequests].randomizationGroup, 1, SEQ_RANDOMGROUP,
257 "factor", SDDS_DOUBLE, &seqRequest[seqRequests].factor, 1, 0,
258 "offset", SDDS_DOUBLE, &seqRequest[seqRequests].offset, 1, 0, NULL) ||
259 bitsSet(seqRequest[seqRequests].flags & (SEQ_INDEPNAME + SEQ_CDFNAME + SEQ_DFNAME)) != 2)
260 SDDS_Bomb("invalid -columns syntax");
261 if (seqRequest[seqRequests].flags & SEQ_RANDOMGROUP && seqRequest[seqRequests].randomizationGroup <= 0)
262 SDDS_Bomb("use a positive integer for the randomization group ID");
263 if (seqRequest[seqRequests].flags & SEQ_CDFNAME && seqRequest[seqRequests].flags & SEQ_DFNAME)
264 SDDS_Bomb("give df or cdf for -columns, not both");
265 if (seqRequest[seqRequests].flags & SEQ_HALTONRADIX && !is_prime(seqRequest[seqRequests].haltonRadix))
266 SDDS_Bomb("halton radix must be a prime number");
267 seqRequests++;
268 scanned[iArg].n_items += 1;
269 break;
270 case CLO_GAUSSIAN:
271 if (scanned[iArg].n_items < 2)
272 SDDS_Bomb("invalid -gaussian syntax");
273 if (!(seqRequest = SDDS_Realloc(seqRequest, sizeof(*seqRequest) * (seqRequests + 1))))
274 SDDS_Bomb("memory allocation failure");
275 memset(seqRequest + seqRequests, 0, sizeof(*seqRequest));
276 scanned[iArg].n_items -= 1;
277 seqRequest[seqRequests].randomizationGroup = -1;
278 seqRequest[seqRequests].mean = 0;
279 seqRequest[seqRequests].sigma = 1;
280 if (!scanItemList(&seqRequest[seqRequests].flags,
281 scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
282 "columnName", SDDS_STRING, &seqRequest[seqRequests].outputName, 1, SEQ_OUTPUTNAME,
283 "meanValue", SDDS_STRING, &meanPar, 1, 0,
284 "sigmaValue", SDDS_STRING, &sigmaPar, 1, 0,
285 "units", SDDS_STRING, &seqRequest[seqRequests].units, 1, SEQ_UNITSGIVEN, NULL))
286 SDDS_Bomb("invalid -gaussian syntax");
287 seqRequest[seqRequests].flags |= SEQ_DIRECT_GAUSSIAN;
288 if (!(seqRequest[seqRequests].flags & SEQ_OUTPUTNAME) || !(seqRequest[seqRequests].outputName))
289 SDDS_Bomb("columnName is not provided for gaussian distribution/");
290 if (meanPar) {
291 if (wild_match(meanPar, "@*"))
292 SDDS_CopyString(&seqRequest[seqRequests].meanPar, meanPar + 1);
293 else if (!get_double(&seqRequest[seqRequests].mean, meanPar))
294 SDDS_Bomb("Invalid value given for mean value of -gaussian distribution.");
295 free(meanPar);
296 meanPar = NULL;
297 }
298 if (sigmaPar) {
299 if (wild_match(sigmaPar, "@*"))
300 SDDS_CopyString(&seqRequest[seqRequests].sigmaPar, sigmaPar + 1);
301 else if (!get_double(&seqRequest[seqRequests].sigma, sigmaPar))
302 SDDS_Bomb("Invalid value given for sigma value of -gaussian distribution.");
303 free(sigmaPar);
304 sigmaPar = NULL;
305 }
306 seqRequests++;
307 scanned[iArg].n_items += 1;
308 break;
309 case CLO_UNIFORM:
310 if (scanned[iArg].n_items < 2)
311 SDDS_Bomb("invalid -uniform syntax");
312 if (!(seqRequest = SDDS_Realloc(seqRequest, sizeof(*seqRequest) * (seqRequests + 1))))
313 SDDS_Bomb("memory allocation failure");
314 memset(seqRequest + seqRequests, 0, sizeof(*seqRequest));
315 scanned[iArg].n_items -= 1;
316 memset(seqRequest + seqRequests, 0, sizeof(*seqRequest));
317 seqRequest[seqRequests].randomizationGroup = -1;
318 seqRequest[seqRequests].min = 0;
319 seqRequest[seqRequests].max = 1;
320 if (!scanItemList(&seqRequest[seqRequests].flags,
321 scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
322 "columnName", SDDS_STRING, &seqRequest[seqRequests].outputName, 1, SEQ_OUTPUTNAME,
323 "minimumValue", SDDS_STRING, &minPar, 1, 0,
324 "maximumValue", SDDS_STRING, &maxPar, 1, 0,
325 "units", SDDS_STRING, &seqRequest[seqRequests].units, 1, SEQ_UNITSGIVEN, NULL))
326 SDDS_Bomb("invalid -uniform syntax");
327 seqRequest[seqRequests].flags |= SEQ_DIRECT_UNIFORM;
328 if (!(seqRequest[seqRequests].flags & SEQ_OUTPUTNAME) ||
329 !(seqRequest[seqRequests].outputName))
330 SDDS_Bomb("columnName is not provided for uniform distribution/");
331 if (minPar) {
332 if (wild_match(minPar, "@*"))
333 SDDS_CopyString(&seqRequest[seqRequests].minPar, minPar + 1);
334 else if (!get_double(&seqRequest[seqRequests].min, minPar))
335 SDDS_Bomb("Invalid value given for minimum value of -uniform distribution.");
336 free(minPar);
337 minPar = NULL;
338 }
339 if (maxPar) {
340 if (wild_match(maxPar, "@*"))
341 SDDS_CopyString(&seqRequest[seqRequests].maxPar, maxPar + 1);
342 else if (!get_double(&seqRequest[seqRequests].max, maxPar))
343 SDDS_Bomb("Invalid value given for maximum value of -uniform distribution.");
344 free(maxPar);
345 maxPar = NULL;
346 }
347 seqRequests++;
348 scanned[iArg].n_items += 1;
349 break;
350 case CLO_POISSON:
351 if (scanned[iArg].n_items < 2)
352 SDDS_Bomb("invalid -poisson syntax");
353 if (!(seqRequest = SDDS_Realloc(seqRequest, sizeof(*seqRequest) * (seqRequests + 1))))
354 SDDS_Bomb("memory allocation failure");
355 memset(seqRequest + seqRequests, 0, sizeof(*seqRequest));
356 scanned[iArg].n_items -= 1;
357 memset(seqRequest + seqRequests, 0, sizeof(*seqRequest));
358 seqRequest[seqRequests].randomizationGroup = -1;
359 seqRequest[seqRequests].mean = 1;
360 if (!scanItemList(&seqRequest[seqRequests].flags,
361 scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
362 "columnName", SDDS_STRING, &seqRequest[seqRequests].outputName, 1, SEQ_OUTPUTNAME,
363 "meanValue", SDDS_STRING, &meanPar, 1, 0,
364 "units", SDDS_STRING, &seqRequest[seqRequests].units, 1, SEQ_UNITSGIVEN, NULL))
365 SDDS_Bomb("invalid -poisson syntax");
366 seqRequest[seqRequests].flags |= SEQ_DIRECT_POISSON;
367 if (!(seqRequest[seqRequests].flags & SEQ_OUTPUTNAME) || !(seqRequest[seqRequests].outputName))
368 SDDS_Bomb("columnName is not provided for poisson distribution/");
369 if (meanPar) {
370 if (wild_match(meanPar, "@*"))
371 SDDS_CopyString(&seqRequest[seqRequests].meanPar, meanPar + 1);
372 else if (!get_double(&seqRequest[seqRequests].mean, meanPar))
373 SDDS_Bomb("Invalid value given for mean value of -poisson distribution.");
374 free(meanPar);
375 meanPar = NULL;
376 }
377 seqRequests++;
378 scanned[iArg].n_items += 1;
379 break;
380 case CLO_SAMPLES:
381 if (scanned[iArg].n_items != 2 ||
382 sscanf(scanned[iArg].list[1], "%ld", &samples) != 1 ||
383 samples <= 0)
384 SDDS_Bomb("invalid -samples syntax");
385 break;
386 case CLO_SEED:
387 if (scanned[iArg].n_items != 2 ||
388 sscanf(scanned[iArg].list[1], "%ld", &randomNumberSeed) != 1)
389 SDDS_Bomb("invalid -seed syntax");
390 break;
391 case CLO_PIPE:
392 if (!processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
393 SDDS_Bomb("invalid -pipe syntax");
394 break;
395 case CLO_VERBOSE:
396 verbose = 1;
397 break;
398 case CLO_OPTIMAL_HALTON:
399 optimalHalton = 1;
400 break;
401 default:
402 fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
403 exit(EXIT_FAILURE);
404 break;
405 }
406 } else {
407 if (!input)
408 input = scanned[iArg].list[0];
409 else if (!output)
410 output = scanned[iArg].list[0];
411 else
412 SDDS_Bomb("too many filenames seen");
413 }
414 }
415
416 if (!seqRequests)
417 SDDS_Bomb("give one or more -columns options");
418 if (samples < 1)
419 SDDS_Bomb("-samples option not given");
420
421 for (i = 0; i < seqRequests; i++) {
422 if (!(seqRequest[i].flags & (SEQ_DATAFILE | SEQ_DIRECT_GAUSSIAN | SEQ_DIRECT_UNIFORM | SEQ_DIRECT_POISSON)))
423 break;
424 }
425 if (i == seqRequests) {
426 /* all columns options have either their own input files or else use
427 * one of the "direct" distributions. Hence, we don't expect an input
428 * file.
429 */
430 if (!input)
431 pipeFlags |= USE_STDIN; /* not really, but fakes out processFilenames */
432 if (input && !output) {
433 output = input;
434 input = NULL;
435 pipeFlags |= USE_STDIN;
436 if (fexists(output)) {
437 sprintf(msgBuffer, "%s exists already (sddssampledist)", output);
438 SDDS_Bomb(msgBuffer);
439 }
440 }
441 }
442
443 processFilenames("sddssampledist", &input, &output, pipeFlags, 0, NULL);
444
445 if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, NULL, output))
446 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
447
448 if (verbose)
449 fprintf(stderr, "Initialized output file %s\n", output);
450
451 /* open and check input files */
452 for (i = mainInputOpened = 0; i < seqRequests; i++) {
453 if (seqRequest[i].flags & SEQ_DIRECT_GAUSSIAN) {
454 if (seqRequest[i].meanPar || seqRequest[i].sigmaPar) {
455 if (!mainInputOpened) {
456 if (!SDDS_InitializeInput(&SDDSin, input) ||
457 !SDDS_TransferAllParameterDefinitions(&SDDSout, &SDDSin, 0))
458 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
459 mainInputOpened = 1;
460 }
461 requireInput = 1;
462 SDDSptr = &SDDSin;
463 if ((seqRequest[i].meanPar &&
464 SDDS_CheckParameter(SDDSptr, seqRequest[i].meanPar, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK) ||
465 (seqRequest[i].sigmaPar &&
466 SDDS_CheckParameter(SDDSptr, seqRequest[i].sigmaPar, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)) {
467 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
468 exit(EXIT_FAILURE);
469 }
470 }
471 if (!SDDS_DefineSimpleColumn(&SDDSout, seqRequest[i].outputName, NULL, SDDS_DOUBLE))
472 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
473 } else if (seqRequest[i].flags & SEQ_DIRECT_UNIFORM) {
474 if (seqRequest[i].minPar || seqRequest[i].maxPar) {
475 if (!mainInputOpened) {
476 if (!SDDS_InitializeInput(&SDDSin, input) ||
477 !SDDS_TransferAllParameterDefinitions(&SDDSout, &SDDSin, 0))
478 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
479 mainInputOpened = 1;
480 }
481 requireInput = 1;
482 SDDSptr = &SDDSin;
483 if ((seqRequest[i].minPar &&
484 SDDS_CheckParameter(SDDSptr, seqRequest[i].minPar, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK) ||
485 (seqRequest[i].maxPar &&
486 SDDS_CheckParameter(SDDSptr, seqRequest[i].maxPar, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK)) {
487 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
488 exit(EXIT_FAILURE);
489 }
490 }
491 if (!SDDS_DefineSimpleColumn(&SDDSout, seqRequest[i].outputName, NULL, SDDS_DOUBLE))
492 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
493 } else if (seqRequest[i].flags & SEQ_DIRECT_POISSON) {
494 if (seqRequest[i].meanPar) {
495 if (!mainInputOpened) {
496 if (!SDDS_InitializeInput(&SDDSin, input) ||
497 !SDDS_TransferAllParameterDefinitions(&SDDSout, &SDDSin, 0))
498 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
499 mainInputOpened = 1;
500 }
501 requireInput = 1;
502 SDDSptr = &SDDSin;
503 if (SDDS_CheckParameter(SDDSptr, seqRequest[i].meanPar, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK) {
504 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
505 exit(EXIT_FAILURE);
506 }
507 }
508 if (!SDDS_DefineSimpleColumn(&SDDSout, seqRequest[i].outputName, NULL, SDDS_LONG))
509 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
510 } else {
511 if (seqRequest[i].flags & SEQ_RANDOMIZE) {
512 long newGroupID = 0;
513 /* define randomization groups */
514 if (seqRequest[i].flags & SEQ_RANDOMGROUP) {
515 newGroupID = seqRequest[i].randomizationGroup;
516 for (j = 0; j < randomizationGroups; j++)
517 if (randomizationData[j].group == newGroupID) {
518 newGroupID = 0;
519 break;
520 }
521 } else {
522 seqRequest[i].randomizationGroup = newGroupID = -(i + 1);
523 }
524 if (newGroupID != 0) {
525 if (!(randomizationData = SDDS_Realloc(randomizationData, sizeof(*randomizationData) * (randomizationGroups + 1))))
526 SDDS_Bomb("memory allocation failure");
527 randomizationData[randomizationGroups].group = newGroupID;
528 randomizationData[randomizationGroups].order = NULL;
529 randomizationGroups++;
530 }
531 }
532 if (seqRequest[i].flags & SEQ_DATAFILE) {
533 if (!SDDS_InitializeInput(&seqRequest[i].SDDSin, seqRequest[i].dataFileName))
534 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
535 SDDSptr = &seqRequest[i].SDDSin;
536 } else {
537 if (!mainInputOpened) {
538 if (!SDDS_InitializeInput(&SDDSin, input) ||
539 !SDDS_TransferAllParameterDefinitions(&SDDSout, &SDDSin, 0))
540 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
541 mainInputOpened = 1;
542 }
543 requireInput = 1;
544 SDDSptr = &SDDSin;
545 }
546 if (SDDS_CheckColumn(SDDSptr, seqRequest[i].indepName, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK ||
547 ((seqRequest[i].flags & SEQ_CDFNAME) &&
548 SDDS_CheckColumn(SDDSptr, seqRequest[i].CDFName, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK) ||
549 ((seqRequest[i].flags & SEQ_DFNAME) &&
550 SDDS_CheckColumn(SDDSptr, seqRequest[i].DFName, NULL, SDDS_ANY_NUMERIC_TYPE, stderr) != SDDS_CHECK_OK) ||
551 !SDDS_TransferColumnDefinition(&SDDSout, SDDSptr, seqRequest[i].indepName, seqRequest[i].outputName)) {
552 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
553 exit(EXIT_FAILURE);
554 }
555 }
556
557 if (seqRequest[i].flags & SEQ_UNITSGIVEN &&
558 !SDDS_ChangeColumnInformation(&SDDSout, "units", seqRequest[i].units, SDDS_SET_BY_NAME, seqRequest[i].outputName))
559 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
560 }
561
562 if (verbose)
563 fprintf(stderr, "Initialized input files\n");
564
565 if (columnMajorOrder != -1)
566 SDDSout.layout.data_mode.column_major = columnMajorOrder;
567 else
568 SDDSout.layout.data_mode.column_major = 0;
569
570 if (!SDDS_WriteLayout(&SDDSout))
571 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
572
573 if (randomNumberSeed <= 0) {
574 randomNumberSeed = (long)time((time_t *)NULL);
575 randomNumberSeed = 2 * (randomNumberSeed / 2) + 1;
576#if defined(_WIN32) || defined(__APPLE__)
577 random_1(-labs(randomNumberSeed));
578#else
579 random_1(-fabs(randomNumberSeed));
580#endif
581 } else
582 random_1(-randomNumberSeed);
583
584 if (!(sample = calloc(samples, sizeof(*sample))))
585 SDDS_Bomb("memory allocation failure");
586 while (1) {
587 if (verbose)
588 fprintf(stderr, "Beginning page loop\n");
589 if (input && SDDS_ReadPage(&SDDSin) <= 0)
590 break;
591 for (i = 0; i < seqRequests; i++) {
592 if (seqRequest[i].flags & SEQ_DATAFILE && SDDS_ReadPage(&seqRequest[i].SDDSin) <= 0)
593 break;
594 }
595 if (i != seqRequests)
596 break;
597 if (!SDDS_StartPage(&SDDSout, samples) || (input && !SDDS_CopyParameters(&SDDSout, &SDDSin)))
598 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
599 if (verbose)
600 fprintf(stderr, "Defining randomization tables\n");
601 /* define randomization tables */
602 for (i = 0; i < randomizationGroups; i++) {
603 if (!(randomizationData[i].order = SDDS_Malloc(sizeof(*randomizationData[i].order) * samples)))
604 SDDS_Bomb("memory allocation failure");
605 for (j = 0; j < samples; j++)
606 randomizationData[i].order[j] = j;
607 randomizeOrder((char *)randomizationData[i].order, sizeof(*randomizationData[i].order), samples, 0, random_1);
608 }
609 if (verbose)
610 fprintf(stderr, "Beginning loop over sequence requests\n");
611 for (i = 0; i < seqRequests; i++) {
612 if (verbose)
613 fprintf(stderr, "Processing sequence request %ld\n", i);
614 if (seqRequest[i].flags & SEQ_DIRECT_GAUSSIAN) {
615 if ((seqRequest[i].meanPar &&
616 !SDDS_GetParameterAsDouble(&SDDSin, seqRequest[i].meanPar, &seqRequest[i].mean)) ||
617 (seqRequest[i].sigmaPar &&
618 !SDDS_GetParameterAsDouble(&SDDSin, seqRequest[i].sigmaPar, &seqRequest[i].sigma)))
619 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
620 for (j = 0; j < samples; j++)
621 sample[j] = gauss_rn_lim(seqRequest[i].mean, seqRequest[i].sigma, -1, random_1);
622 } else if (seqRequest[i].flags & SEQ_DIRECT_UNIFORM) {
623 if ((seqRequest[i].minPar &&
624 !SDDS_GetParameterAsDouble(&SDDSin, seqRequest[i].minPar, &seqRequest[i].min)) ||
625 (seqRequest[i].maxPar &&
626 !SDDS_GetParameterAsDouble(&SDDSin, seqRequest[i].maxPar, &seqRequest[i].max)))
627 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
628 for (j = 0; j < samples; j++)
629 sample[j] = seqRequest[i].min + (seqRequest[i].max - seqRequest[i].min) * random_1(1);
630 } else if (seqRequest[i].flags & SEQ_DIRECT_POISSON) {
631 double *pos_x, *pos_cdf, CDF;
632 long pos_points, code;
633 pos_x = pos_cdf = NULL;
634 if ((seqRequest[i].meanPar &&
635 !SDDS_GetParameterAsDouble(&SDDSin, seqRequest[i].meanPar, &seqRequest[i].mean)))
636 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
637 pos_points = CreatePoissonDistributionTable(&pos_x, &pos_cdf, seqRequest[i].mean);
638
639 for (j = 0; j < samples; j++) {
640 CDF = random_1(1);
641 sample[j] = (int)(interp(pos_x, pos_cdf, pos_points, CDF, 0, 1, &code));
642 /* fprintf(stderr, "%ld, cdf=%f, sample=%f\n", j, CDF, sample[j]); */
643 }
644 free(pos_x);
645 free(pos_cdf);
646 } else {
647 if (input && !(seqRequest[i].flags & SEQ_DATAFILE))
648 SDDSptr = &SDDSin;
649 else
650 SDDSptr = &seqRequest[i].SDDSin;
651 if ((values = SDDS_CountRowsOfInterest(SDDSptr))) {
652 if (!(IVValue = SDDS_GetColumnInDoubles(SDDSptr, seqRequest[i].indepName)) ||
653 (seqRequest[i].flags & SEQ_CDFNAME &&
654 !(CDFValue = SDDS_GetColumnInDoubles(SDDSptr, seqRequest[i].CDFName))) ||
655 (seqRequest[i].flags & SEQ_DFNAME &&
656 !(CDFValue = SDDS_GetColumnInDoubles(SDDSptr, seqRequest[i].DFName))))
657 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
658 } else {
659 sprintf(msgBuffer, "empty page for file %s\n",
660 (seqRequest[i].flags & SEQ_DATAFILE) ? seqRequest[i].dataFileName : input);
661 SDDS_Bomb(msgBuffer);
662 }
663 if (verbose)
664 fprintf(stderr, "Checking and converting CDF/DF values\n");
665 /* check/convert CDF/DF values */
666 for (j = 1; j < values; j++) {
667 if (IVValue[j - 1] > IVValue[j]) {
668 sprintf(msgBuffer, "random variate values not monotonically increasing for %s",
669 (seqRequest[i].flags & SEQ_DATAFILE) ? seqRequest[i].dataFileName : input);
670 SDDS_Bomb(msgBuffer);
671 }
672 if (seqRequest[i].flags & SEQ_DFNAME)
673 /* convert DF to CDF */
674 CDFValue[j] += CDFValue[j - 1];
675 if (CDFValue[j] < CDFValue[j - 1]) {
676 sprintf(msgBuffer, "CDF values decreasing for %s",
677 (seqRequest[i].flags & SEQ_DATAFILE) ? seqRequest[i].dataFileName : input);
678 SDDS_Bomb(msgBuffer);
679 }
680 }
681 if (verbose)
682 fprintf(stderr, "Normalizing CDF\n");
683 /* normalize the CDF */
684 if (CDFValue[values - 1] <= 0) {
685 sprintf(msgBuffer, "CDF not valid for %s\n", seqRequest[i].dataFileName);
686 SDDS_Bomb(msgBuffer);
687 }
688 for (j = 0; j < values; j++)
689 CDFValue[j] /= CDFValue[values - 1];
690 if (seqRequest[i].flags & SEQ_HALTONRADIX) {
691 if (verbose)
692 fprintf(stderr, "Starting halton sequence, offset=%" PRId32 "\n", seqRequest[i].haltonOffset);
693 if (!optimalHalton)
694 haltonID = startHaltonSequence(&seqRequest[i].haltonRadix, 0.5);
695 else
696 haltonID = startModHaltonSequence(&seqRequest[i].haltonRadix, 0);
697 while (seqRequest[i].haltonOffset-- > 0) {
698 if (!optimalHalton)
699 nextHaltonSequencePoint(haltonID);
700 else
702 }
703 }
704 if (verbose)
705 fprintf(stderr, "Generating samples\n");
706 for (j = 0; j < samples; j++) {
707 double CDF;
708 long code;
709 while (1) {
710 if (seqRequest[i].flags & SEQ_HALTONRADIX) {
711 if (!optimalHalton)
712 CDF = nextHaltonSequencePoint(haltonID);
713 else
714 CDF = nextModHaltonSequencePoint(haltonID);
715 } else
716 CDF = random_1(1);
717 if (CDF <= CDFValue[values - 1] && CDF >= CDFValue[0])
718 break;
719 }
720 sample[j] = seqRequest[i].factor * interp(IVValue, CDFValue, values, CDF, 0, 1, &code) + seqRequest[i].offset;
721 }
722 if (seqRequest[i].flags & SEQ_RANDOMIZE) {
723 long k, l;
724 double *sample1;
725 if (verbose)
726 fprintf(stderr, "Randomizing order of values\n");
727 if (!(sample1 = malloc(sizeof(*sample1) * samples)))
728 SDDS_Bomb("memory allocation failure");
729 for (l = 0; l < randomizationGroups; l++)
730 if (randomizationData[l].group == seqRequest[i].randomizationGroup)
731 break;
732 if (l == randomizationGroups)
733 SDDS_Bomb("problem with construction of randomization groups!");
734 for (k = 0; k < samples; k++)
735 sample1[k] = sample[randomizationData[l].order[k]];
736 free(sample);
737 sample = sample1;
738 }
739 free(IVValue);
740 free(CDFValue);
741 }
742 if (verbose)
743 fprintf(stderr, "Setting SDDS column values\n");
744 if (!SDDS_SetColumnFromDoubles(&SDDSout, SDDS_SET_BY_NAME, sample, samples,
745 seqRequest[i].outputName ? seqRequest[i].outputName : seqRequest[i].indepName))
746 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
747 }
748 if (verbose)
749 fprintf(stderr, "Writing data page\n");
750 if (!SDDS_WritePage(&SDDSout))
751 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
752 if (!requireInput)
753 break;
754 }
755 if (verbose)
756 fprintf(stderr, "Exited read loop\n");
757 free(sample);
758 if ((input && !SDDS_Terminate(&SDDSin)) || !SDDS_Terminate(&SDDSout))
759 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
760 for (i = 0; i < seqRequests; i++) {
761 if (seqRequest[i].dataFileName)
762 free(seqRequest[i].dataFileName);
763 if (seqRequest[i].indepName)
764 free(seqRequest[i].indepName);
765 if (seqRequest[i].outputName)
766 free(seqRequest[i].outputName);
767 if (seqRequest[i].DFName)
768 free(seqRequest[i].DFName);
769 if (seqRequest[i].CDFName)
770 free(seqRequest[i].CDFName);
771 if (seqRequest[i].units)
772 free(seqRequest[i].units);
773 if (seqRequest[i].meanPar)
774 free(seqRequest[i].meanPar);
775 if (seqRequest[i].sigmaPar)
776 free(seqRequest[i].sigmaPar);
777 if (seqRequest[i].minPar)
778 free(seqRequest[i].minPar);
779 if (seqRequest[i].maxPar)
780 free(seqRequest[i].maxPar);
781 if (seqRequest[i].flags & SEQ_DATAFILE && !SDDS_Terminate(&(seqRequest[i].SDDSin)))
782 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors);
783 }
784 free(seqRequest);
785 for (i = 0; i < randomizationGroups; i++)
786 free(randomizationData[i].order);
787 if (randomizationData)
788 free(randomizationData);
789
790 free_scanargs(&scanned, argc);
791
792 return EXIT_SUCCESS;
793}
int32_t SDDS_CopyParameters(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source)
Definition SDDS_copy.c:286
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetColumnFromDoubles(SDDS_DATASET *SDDS_dataset, int32_t mode, double *data, int64_t rows,...)
Sets the values for a single data column using double-precision floating-point numbers.
double * SDDS_GetParameterAsDouble(SDDS_DATASET *SDDS_dataset, char *parameter_name, double *memory)
Retrieves the value of a specified parameter as a double from the current data table of an SDDS datas...
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
double * SDDS_GetColumnInDoubles(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves the data of a specified numerical column as an array of doubles, considering only rows mark...
int32_t SDDS_ChangeColumnInformation(SDDS_DATASET *SDDS_dataset, char *field_name, void *memory, int32_t mode,...)
Modifies a specific field in a column definition within the SDDS dataset.
Definition SDDS_info.c:364
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_TransferColumnDefinition(SDDS_DATASET *target, SDDS_DATASET *source, char *name, char *newName)
Transfers a column definition from a source dataset to a target dataset.
int32_t SDDS_TransferAllParameterDefinitions(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, uint32_t mode)
Transfers all parameter definitions from a source dataset to a target dataset.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void * SDDS_Malloc(size_t size)
Allocates memory of a specified size.
Definition SDDS_utils.c:639
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_CheckParameter(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a parameter exists in the SDDS dataset with the specified name, units, and type.
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_ANY_NUMERIC_TYPE
Special identifier used by SDDS_Check*() routines to accept any numeric type.
Definition SDDStypes.h:157
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
long bitsSet(unsigned long data)
Counts the number of set bits (1s) in the given data.
Definition binary.c:52
int get_double(double *dptr, char *s)
Parses a double value from the given string.
Definition data_scan.c:40
double random_1(long iseed)
Generate a uniform random double in [0,1] using a custom seed initialization.
Definition drand.c:175
long randomizeOrder(char *ptr, long size, long length, long iseed, double(*urandom)(long iseed1))
Randomize the order of an array of elements.
Definition drand.c:465
double gauss_rn_lim(double mean, double sigma, double limit_in_sigmas, double(*urandom)(long iseed))
Generate a Gaussian-distributed random number with specified mean, sigma, and optional cutoff.
Definition drand.c:378
int64_t is_prime(int64_t number)
Determine if a number is prime.
Definition factorize.c:26
long fexists(const char *filename)
Checks if a file exists.
Definition fexists.c:27
int32_t startModHaltonSequence(int32_t *radix, double tiny)
Start a modified Halton sequence.
Definition halton.c:508
double nextModHaltonSequencePoint(long ID)
Retrieve the next point from the modified Halton sequence.
Definition halton.c:624
int32_t startHaltonSequence(int32_t *radix, double value)
Initialize and start a new Halton sequence.
Definition halton.c:38
double nextHaltonSequencePoint(long ID)
Get the next point in a Halton sequence.
Definition halton.c:107
double interp(double *f, double *x, long n, double xo, long warnings, long order, long *returnCode)
Performs simple linear interpolation of data.
Definition interp.c:34
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.
Definition wild_match.c:49