72char *option[N_OPTIONS] = {
85 "sddsshiftcor [-pipe=[input][,output]] [<inputfile>] [<outputfile>] -with=<name>\n" \
86 " [-scan=start=<startShift>,end=<endShift>,delta=<deltaShift>]\n" \
87 " [-columns=<list-of-names>] [-excludeColumns=<list-of-names>]\n" \
88 " [-rankOrder] [-stDevOutlier[=limit=<factor>][,passes=<integer>]]\n" \
89 " [-verbose] [-majorOrder=row|column]\n\n" \
90 "Program by Michael Borland. (\"" __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION ")\n"
92void replaceWithRank(
double *data,
long n);
93double *findRank(
double *data, int64_t n);
94void markStDevOutliers(
double *data,
double limit,
long passes,
short *keep, int64_t n);
95void setupOutputFile(
SDDS_DATASET *SDDSout,
char *output,
char **column,
long columns,
short columnMajorOrder);
97int main(
int argc,
char **argv) {
99 char **column, **excludeColumn, *withOnly;
100 long columns, excludeColumns;
101 char *input, *output, buffer[16384];
102 SCANNED_ARG *scanned;
104 long i, count, readCode, rankOrder;
106 int32_t outlierStDevPasses;
107 double **data, correlation, outlierStDevLimit;
110 unsigned long pipeFlags, dummyFlags, majorOrderFlag;
111 int32_t startShift, endShift, deltaShift;
112 long outputRows, iWith, outputRow, shiftAmount, verbose;
113 short columnMajorOrder = -1;
116 argc =
scanargs(&scanned, argc, argv);
120 output = input = withOnly = NULL;
121 columns = excludeColumns = 0;
122 column = excludeColumn = NULL;
125 outlierStDevPasses = 0;
126 outlierStDevLimit = 1.0;
129 startShift = -(endShift = 10);
133 for (iArg = 1; iArg < argc; iArg++) {
134 if (scanned[iArg].arg_type == OPTION) {
136 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
137 case SET_MAJOR_ORDER:
139 scanned[iArg].n_items--;
140 if (scanned[iArg].n_items > 0 &&
141 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
142 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
143 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
144 SDDS_Bomb(
"invalid -majorOrder syntax/values");
145 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
146 columnMajorOrder = 1;
147 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
148 columnMajorOrder = 0;
152 SDDS_Bomb(
"only one -columns option may be given");
153 if (scanned[iArg].n_items < 2)
155 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
156 for (i = 0; i < columns; i++)
157 column[i] = scanned[iArg].list[i + 1];
160 if (scanned[iArg].n_items < 2)
161 SDDS_Bomb(
"invalid -excludeColumns syntax");
162 moveToStringArray(&excludeColumn, &excludeColumns, scanned[iArg].list + 1, scanned[iArg].n_items - 1);
166 SDDS_Bomb(
"only one -withOnly option may be given");
167 if (scanned[iArg].n_items < 2)
169 withOnly = scanned[iArg].list[1];
172 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
178 case SET_STDEVOUTLIER:
179 scanned[iArg].n_items--;
180 outlierStDevPasses = 1;
181 outlierStDevLimit = 1.0;
182 if (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
184 "passes",
SDDS_LONG, &outlierStDevPasses, 1, 0, NULL) ||
185 outlierStDevPasses <= 0 || outlierStDevLimit <= 0.0)
186 SDDS_Bomb(
"invalid -stdevOutlier syntax/values");
189 scanned[iArg].n_items--;
190 if (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
193 "delta",
SDDS_LONG, &deltaShift, 1, 0, NULL) ||
194 startShift >= endShift || deltaShift <= 0 || (endShift - startShift) < deltaShift)
195 SDDS_Bomb(
"invalid -scan syntax/values");
201 fprintf(stderr,
"error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]);
207 input = scanned[iArg].list[0];
209 output = scanned[iArg].list[0];
221 columns = appendToStringArray(&column, columns,
"*");
223 columns = appendToStringArray(&column, columns, withOnly);
225 if ((columns = expandColumnPairNames(&SDDSin, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0)) <= 0) {
227 SDDS_Bomb(
"no columns selected for correlation analysis");
229 if (columnMajorOrder == -1)
230 columnMajorOrder = SDDSin.layout.data_mode.column_major;
232 setupOutputFile(&SDDSout, output, column, columns, columnMajorOrder);
234 if (!(data = (
double **)malloc(
sizeof(*data) * columns)) ||
235 (rankOrder && !(rank = (
double **)malloc(
sizeof(*rank) * columns))) ||
236 !(accept = (
short **)malloc(
sizeof(*accept) * columns)))
238 outputRows = (endShift - startShift) / deltaShift;
245 for (i = 0; i < columns; i++) {
246 if (strcmp(withOnly, column[i]) == 0)
251 rank[i] = findRank(data[i], rows);
252 if (outlierStDevPasses) {
253 if (!(accept[i] = (
short *)malloc(
sizeof(**accept) * rows)))
255 markStDevOutliers(data[i], outlierStDevLimit, outlierStDevPasses, accept[i], rows);
262 for (shiftAmount = startShift; shiftAmount <= endShift; shiftAmount += deltaShift) {
264 fprintf(stderr,
"Working on shift of %ld\n", shiftAmount);
265 if (!
SDDS_SetRowValues(&SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, outputRow,
"ShiftAmount", shiftAmount, NULL))
267 for (i = 0; i < columns; i++) {
269 rankOrder ? rank[i] : data[i],
270 rankOrder ? rank[iWith] : data[iWith],
276 sprintf(buffer,
"%sShiftedCor", column[i]);
277 if (!
SDDS_SetRowValues(&SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, outputRow, buffer, correlation, NULL))
282 for (i = 0; i < columns; i++) {
304void setupOutputFile(
SDDS_DATASET *SDDSout,
char *output,
char **column,
long columns,
short columnMajorOrder) {
310 SDDSout->layout.data_mode.column_major = columnMajorOrder;
312 for (i = 0; i < columns; i++) {
313 sprintf(buffer,
"%sShiftedCor", column[i]);
315 SDDS_Bomb(
"unable to set up column definitions");
318 SDDS_Bomb(
"unable to set up output file");
321void markStDevOutliers(
double *data,
double limit,
long passes,
short *keep, int64_t n) {
322 double sum1, sum2, variance, mean, absLimit;
324 int64_t i, summed, kept;
326 for (i = 0; i < n; i++)
329 for (pass = 0; pass < passes && kept; pass++) {
332 for (i = 0; i < n; i++) {
336 sum2 += data[i] * data[i];
341 mean = sum1 / summed;
343 if ((variance = sum2 - mean * mean) > 0.0) {
344 absLimit = limit * sqrt(variance);
345 for (i = 0; i < n; i++) {
346 if (keep[i] && fabs(data[i] - mean) > absLimit) {
360int compareData(
const void *d1,
const void *d2) {
363 return (diff == 0.0) ? 0 : (diff < 0.0 ? -1 : 1);
366double *findRank(
double *data, int64_t n) {
370 rank = (
double *)malloc(
sizeof(*rank) * n);
373 for (i = 0; i < n; i++)
375 replaceWithRank(rank, n);
379void replaceWithRank(
double *data,
long n) {
381 long i, j, iStart, iEnd;
383 indexedData =
SDDS_Realloc(indexedData,
sizeof(*indexedData) * n);
384 for (i = 0; i < n; i++) {
385 indexedData[i].data = data[i];
386 indexedData[i].originalIndex = i;
388 qsort((
void *)indexedData, n,
sizeof(*indexedData), compareData);
389 for (i = 0; i < n; i++)
390 data[indexedData[i].originalIndex] = (
double)i;
391 for (i = 0; i < n - 1; i++) {
392 if (data[i] == data[i + 1]) {
394 for (j = i + 2; j < n; j++) {
395 if (data[j - 1] != data[j])
399 double average = (iStart + iEnd) / 2.0;
400 for (j = iStart; j <= iEnd; j++)
401 data[indexedData[j].originalIndex] = average;
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
double shiftedLinearCorrelationCoefficient(double *data1, double *data2, short *accept1, short *accept2, long rows, long *count, long shift)
Compute the linear correlation coefficient between two data sets with a given shift.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.