90 {
91 int iArg;
92 char **column, **excludeColumn, *withOnly;
93 long columns, excludeColumns;
94 char *input, *output;
95 SCANNED_ARG *scanned;
97 long i, j, row, count, readCode, rankOrder, iName1, iName2;
98 int64_t rows;
99 int32_t outlierStDevPasses;
100 double **data, correlation, significance, outlierStDevLimit;
101 double **rank;
102 short **accept;
103 char s[SDDS_MAXLINE];
104 unsigned long pipeFlags, dummyFlags, majorOrderFlag;
105 short columnMajorOrder = -1;
106
108 argc =
scanargs(&scanned, argc, argv);
109 if (argc < 2)
111
112 output = input = withOnly = NULL;
113 columns = excludeColumns = 0;
114 column = excludeColumn = NULL;
115 pipeFlags = 0;
116 rankOrder = 0;
117 outlierStDevPasses = 0;
118 outlierStDevLimit = 1.0;
119 rank = NULL;
120 accept = NULL;
121
122 for (iArg = 1; iArg < argc; iArg++) {
123 if (scanned[iArg].arg_type == OPTION) {
124
125 switch (
match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
126 case SET_MAJOR_ORDER:
127 majorOrderFlag = 0;
128 scanned[iArg].n_items--;
129 if (scanned[iArg].n_items > 0 &&
130 (!
scanItemList(&majorOrderFlag, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
131 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
132 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
133 SDDS_Bomb(
"invalid -majorOrder syntax/values");
134 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
135 columnMajorOrder = 1;
136 else if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
137 columnMajorOrder = 0;
138 break;
139 case SET_COLUMNS:
140 if (columns)
141 SDDS_Bomb(
"only one -columns option may be given");
142 if (scanned[iArg].n_items < 2)
144 column =
tmalloc(
sizeof(*column) * (columns = scanned[iArg].n_items - 1));
145 for (i = 0; i < columns; i++)
146 column[i] = scanned[iArg].list[i + 1];
147 break;
148 case SET_EXCLUDE:
149 if (scanned[iArg].n_items < 2)
150 SDDS_Bomb(
"invalid -excludeColumns syntax");
151 moveToStringArray(&excludeColumn, &excludeColumns, scanned[iArg].list + 1, scanned[iArg].n_items - 1);
152 break;
153 case SET_WITHONLY:
154 if (withOnly)
155 SDDS_Bomb(
"only one -withOnly option may be given");
156 if (scanned[iArg].n_items < 2)
158 withOnly = scanned[iArg].list[1];
159 break;
160 case SET_PIPE:
161 if (!
processPipeOption(scanned[iArg].list + 1, scanned[iArg].n_items - 1, &pipeFlags))
163 break;
164 case SET_RANKORDER:
165 rankOrder = 1;
166 break;
167 case SET_STDEVOUTLIER:
168 scanned[iArg].n_items--;
169 outlierStDevPasses = 1;
170 outlierStDevLimit = 1.0;
171 if (!
scanItemList(&dummyFlags, scanned[iArg].list + 1, &scanned[iArg].n_items, 0,
173 "passes",
SDDS_LONG, &outlierStDevPasses, 1, 0, NULL) ||
174 outlierStDevPasses <= 0 || outlierStDevLimit <= 0.0)
175 SDDS_Bomb(
"invalid -stdevOutlier syntax/values");
176 break;
177 default:
178 fprintf(stderr, "Error: unknown or ambiguous option: %s\n", scanned[iArg].list[0]);
179 exit(EXIT_FAILURE);
180 break;
181 }
182 } else {
183 if (!input)
184 input = scanned[iArg].list[0];
185 else if (!output)
186 output = scanned[iArg].list[0];
187 else
189 }
190 }
191
193
196
197 if (!columns)
198 columns = appendToStringArray(&column, columns, "*");
199 if (withOnly)
200 columns = appendToStringArray(&column, columns, withOnly);
201
202 if ((columns = expandColumnPairNames(&SDDSin, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0)) <= 0) {
204 SDDS_Bomb(
"no columns selected for correlation analysis");
205 }
206
212 SDDS_DefineColumn(&SDDSout,
"CorrelationSignificance",
"P$br$n", NULL,
"Linear correlation coefficient significance", NULL,
SDDS_DOUBLE, 0) < 0 ||
213 SDDS_DefineColumn(&SDDSout,
"CorrelationPoints", NULL, NULL,
"Number of points used for correlation", NULL,
SDDS_LONG, 0) < 0 ||
215 SDDS_DefineParameter(&SDDSout,
"sddscorrelateInputFile", NULL, NULL,
"Data file processed by sddscorrelate", NULL,
SDDS_STRING, input ? input :
"stdin") < 0 ||
216 SDDS_DefineParameter(&SDDSout,
"sddscorrelateMode", NULL, NULL, NULL, NULL,
SDDS_STRING, rankOrder ?
"Rank-Order (Spearman)" :
"Linear (Pearson)") < 0 ||
217 SDDS_DefineParameter1(&SDDSout,
"sddscorrelateStDevOutlierPasses", NULL, NULL,
"Number of passes of standard-deviation outlier elimination applied", NULL,
SDDS_LONG, &outlierStDevPasses) < 0 ||
218 SDDS_DefineParameter1(&SDDSout,
"sddscorrelateStDevOutlierLimit", NULL, NULL,
"Standard-deviation outlier limit applied", NULL,
SDDS_DOUBLE, &outlierStDevLimit) < 0) {
220 }
221
222 if (columnMajorOrder != -1)
223 SDDSout.layout.data_mode.column_major = columnMajorOrder;
224 else
225 SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
226
229
230 data = malloc(sizeof(*data) * columns);
231 if (!data ||
232 (rankOrder && !(rank = malloc(sizeof(*rank) * columns))) ||
233 !(accept = malloc(sizeof(*accept) * columns))) {
235 }
236
239 continue;
241 !
SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE,
"CorrelatedRows", rows, NULL)) {
243 }
244 for (i = 0; i < columns; i++) {
246 if (!data[i])
248 if (rankOrder)
249 rank[i] = findRank(data[i], rows);
250 if (outlierStDevPasses) {
251 accept[i] = malloc(sizeof(**accept) * rows);
252 if (!accept[i])
254 markStDevOutliers(data[i], outlierStDevLimit, outlierStDevPasses, accept[i], rows);
255 } else {
256 accept[i] = NULL;
257 }
258 }
259 for (i = row = 0; i < columns; i++) {
260 for (j = i + 1; j < columns; j++) {
261 iName1 = i;
262 iName2 = j;
263 if (withOnly) {
264 if (strcmp(withOnly, column[i]) == 0) {
265 iName1 = j;
266 iName2 = i;
267 } else if (strcmp(withOnly, column[j]) == 0) {
268 iName1 = i;
269 iName2 = j;
270 } else {
271 continue;
272 }
273 }
275 rankOrder ? rank[j] : data[j],
276 accept[i], accept[j], rows, &count);
278 snprintf(s, sizeof(s), "%s.%s", column[iName1], column[iName2]);
280 0, column[iName1],
281 1, column[iName2],
282 2, s,
283 3, correlation,
284 4, significance,
285 5, count,
286 -1)) {
288 }
289 }
290 }
291 for (i = 0; i < columns; i++) {
292 free(data[i]);
293 if (rankOrder)
294 free(rank[i]);
295 if (accept[i])
296 free(accept[i]);
297 }
300 }
301
302 free(data);
303 if (rankOrder)
304 free(rank);
305 free(accept);
306
309 exit(EXIT_FAILURE);
310 }
313 exit(EXIT_FAILURE);
314 }
315
316 return EXIT_SUCCESS;
317}
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
int32_t SDDS_DefineParameter1(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, void *fixed_value)
Defines a data parameter with a fixed numerical value.
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_DefineColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, int32_t field_length)
Defines a data column within the SDDS dataset.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_DefineParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *symbol, const char *units, const char *description, const char *format_string, int32_t type, char *fixed_value)
Defines a data parameter with a fixed string value.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
#define SDDS_STRING
Identifier for the string data type.
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
#define SDDS_DOUBLE
Identifier for the double data type.
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
double linearCorrelationSignificance(double r, long rows)
Compute the statistical significance of a linear correlation coefficient.
double linearCorrelationCoefficient(double *data1, double *data2, short *accept1, short *accept2, long rows, long *count)
Compute the linear correlation coefficient for two data sets.
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
long processPipeOption(char **item, long items, unsigned long *flags)
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.