117 {
118 int iArg;
119 char **column, **excludeColumn, *withOnly;
120 long columns, excludeColumns;
121 char *input, *output;
122 SCANNED_ARG *scanned;
124 long i, j, row, count, readCode, rankOrder, iName1, iName2;
125 int64_t rows;
126 int32_t outlierStDevPasses;
127 double **data, correlation, significance, outlierStDevLimit;
128 double **rank;
129 short **accept;
130 char s[SDDS_MAXLINE];
131 unsigned long pipeFlags, dummyFlags, majorOrderFlag;
132 short columnMajorOrder = -1;
133
135 argc =
scanargs(&scanned, argc, argv);
136 if (argc < 2)
138
139 output = input = withOnly = NULL;
140 columns = excludeColumns = 0;
141 column = excludeColumn = NULL;
142 pipeFlags = 0;
143 rankOrder = 0;
144 outlierStDevPasses = 0;
145 outlierStDevLimit = 1.0;
146 rank = NULL;
147 accept = NULL;
148
149 for (iArg = 1; iArg < argc; iArg++) {
150 if (scanned[iArg].arg_type == OPTION) {
151
152 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
153 case SET_MAJOR_ORDER:
154 majorOrderFlag = 0;
155 scanned[iArg].n_items--;
156 if (scanned[iArg].n_items > 0 &&
157 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
158 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
159 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
160 SDDS_Bomb(
"invalid -majorOrder syntax/values");
161 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
162 columnMajorOrder = 1;
163 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
164 columnMajorOrder = 0;
165 break;
166 case SET_COLUMNS:
167 if (columns)
168 SDDS_Bomb(
"only one -columns option may be given");
169 if (scanned[iArg].n_items < 2)
171 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
172 for (i = 0; i < columns; i++)
173 column[i] = scanned[iArg].list[i + 1];
174 break;
175 case SET_EXCLUDE:
176 if (scanned[iArg].n_items < 2)
177 SDDS_Bomb(
"invalid -excludeColumns syntax");
178 moveToStringArray(&excludeColumn, &excludeColumns, scanned[iArg].list + 1, scanned[iArg].n_items - 1);
179 break;
180 case SET_WITHONLY:
181 if (withOnly)
182 SDDS_Bomb(
"only one -withOnly option may be given");
183 if (scanned[iArg].n_items < 2)
185 withOnly = scanned[iArg].list[1];
186 break;
187 case SET_PIPE:
188 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
190 break;
191 case SET_RANKORDER:
192 rankOrder = 1;
193 break;
194 case SET_STDEVOUTLIER:
195 scanned[iArg].n_items--;
196 outlierStDevPasses = 1;
197 outlierStDevLimit = 1.0;
198 if (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
200 "passes",
SDDS_LONG, &outlierStDevPasses, 1, 0, NULL) ||
201 outlierStDevPasses <= 0 || outlierStDevLimit <= 0.0)
202 SDDS_Bomb(
"invalid -stdevOutlier syntax/values");
203 break;
204 default:
205 fprintf(stderr, "Error: unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
206 exit(EXIT_FAILURE);
207 break;
208 }
209 } else {
210 if (!input)
211 input = scanned[iArg].list[0];
212 else if (!output)
213 output = scanned[iArg].list[0];
214 else
216 }
217 }
218
220
223
224 if (!columns)
225 columns = appendToStringArray(&column, columns, "*");
226 if (withOnly)
227 columns = appendToStringArray(&column, columns, withOnly);
228
229 if ((columns = expandColumnPairNames(&SDDSin, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0)) <= 0) {
231 SDDS_Bomb(
"no columns selected for correlation analysis");
232 }
233
239 SDDS_DefineColumn(&SDDSout,
"CorrelationSignificance",
"P$br$n", NULL,
"Linear correlation coefficient significance", NULL,
SDDS_DOUBLE, 0) < 0 ||
240 SDDS_DefineColumn(&SDDSout,
"CorrelationPoints", NULL, NULL,
"Number of points used for correlation", NULL,
SDDS_LONG, 0) < 0 ||
242 SDDS_DefineParameter(&SDDSout,
"sddscorrelateInputFile", NULL, NULL,
"Data file processed by sddscorrelate", NULL,
SDDS_STRING, input ? input :
"stdin") < 0 ||
243 SDDS_DefineParameter(&SDDSout,
"sddscorrelateMode", NULL, NULL, NULL, NULL,
SDDS_STRING, rankOrder ?
"Rank-Order (Spearman)" :
"Linear (Pearson)") < 0 ||
244 SDDS_DefineParameter1(&SDDSout,
"sddscorrelateStDevOutlierPasses", NULL, NULL,
"Number of passes of standard-deviation outlier elimination applied", NULL,
SDDS_LONG, &outlierStDevPasses) < 0 ||
245 SDDS_DefineParameter1(&SDDSout,
"sddscorrelateStDevOutlierLimit", NULL, NULL,
"Standard-deviation outlier limit applied", NULL,
SDDS_DOUBLE, &outlierStDevLimit) < 0) {
247 }
248
249 if (columnMajorOrder != -1)
250 SDDSout.layout.data_mode.column_major = columnMajorOrder;
251 else
252 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
253
256
257 data = malloc(sizeof(*data) * columns);
258 if (!data ||
259 (rankOrder && !(rank = malloc(sizeof(*rank) * columns))) ||
260 !(accept = malloc(sizeof(*accept) * columns))) {
262 }
263
266 continue;
268 !
SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
"CorrelatedRows", rows, NULL)) {
270 }
271 for (i = 0; i < columns; i++) {
273 if (!data[i])
275 if (rankOrder)
276 rank[i] = findRank(data[i], rows);
277 if (outlierStDevPasses) {
278 accept[i] = malloc(sizeof(**accept) * rows);
279 if (!accept[i])
281 markStDevOutliers(data[i], outlierStDevLimit, outlierStDevPasses, accept[i], rows);
282 } else {
283 accept[i] = NULL;
284 }
285 }
286 for (i = row = 0; i < columns; i++) {
287 for (j = i + 1; j < columns; j++) {
288 iName1 = i;
289 iName2 = j;
290 if (withOnly) {
291 if (strcmp(withOnly, column[i]) == 0) {
292 iName1 = j;
293 iName2 = i;
294 } else if (strcmp(withOnly, column[j]) == 0) {
295 iName1 = i;
296 iName2 = j;
297 } else {
298 continue;
299 }
300 }
302 rankOrder ? rank[j] : data[j],
303 accept[i], accept[j], rows, &count);
305 snprintf(s, sizeof(s), "%s.%s", column[iName1], column[iName2]);
307 0, column[iName1],
308 1, column[iName2],
309 2, s,
310 3, correlation,
311 4, significance,
312 5, count,
313 -1)) {
315 }
316 }
317 }
318 for (i = 0; i < columns; i++) {
319 free(data[i]);
320 if (rankOrder)
321 free(rank[i]);
322 if (accept[i])
323 free(accept[i]);
324 }
327 }
328
329 free(data);
330 if (rankOrder)
331 free(rank);
332 free(accept);
333
336 exit(EXIT_FAILURE);
337 }
340 exit(EXIT_FAILURE);
341 }
342
343 return EXIT_SUCCESS;
344}
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_DefineParameter1(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, void *fixed_value)
Defines a data parameter with a fixed numerical value.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
double linearCorrelationSignificance(double r, long rows)
Compute the statistical significance of a linear correlation coefficient.
double linearCorrelationCoefficient(double *data1, double *data2, short *accept1, short *accept2, long rows, long *count)
Compute the linear correlation coefficient for two data sets.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.