SDDSlib
Loading...
Searching...
No Matches
sddsmatrix2column.c
Go to the documentation of this file.
1/**
2 * @file sddsmatrix2column.c
3 * @brief Transform a matrix into a column by stacking one column on top of another.
4 *
5 * This program reads an SDDS file containing an optional string column and multiple numerical columns.
6 * It outputs an SDDS file with two columns: a string column and a data column. The string column is a
7 * combination of the input string column (or generated row names) and the input data column names.
8 *
9 * The program provides options to specify row names, data column names, and the order in which the
10 * matrix is traversed (row-major or column-major). It supports pipe-based input and output.
11 *
12 * ### Usage
13 * `sddsmatrix2column [<inputfile>] [<outputfile>]`
14 *
15 * ### Options
16 * - `-pipe=<input>,<output>`: Specifies pipe flags for input and/or output.
17 * - `-rowNameColumn=<string>`: Specifies the column name for row names in the input file.
18 * - `-dataColumnName=<string>`: Specifies the column name for data in the output file.
19 * - `-rootnameColumnName=<string>`: Specifies the column name for the string column in the output file.
20 * - `-majorOrder=row|column`: Determines the order to traverse the matrix (row-major or column-major).
21 *
22 * ### Example
23 * To convert a matrix stored in `input.sdds` to a column format stored in `output.sdds`:
24 * ```
25 * sddsmatrix2column input.sdds output.sdds -rowNameColumn=RowName -dataColumnName=Data -majorOrder=row
26 * ```
27 *
28 * @copyright
29 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
30 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
31 *
32 * @license
33 * This file is distributed under the terms of the Software License Agreement
34 * found in the file LICENSE included with this distribution.
35 *
36 * @author H. Shang, R. Soliday
37 */
38
39#include "mdb.h"
40#include "SDDS.h"
41#include "scan.h"
42
43/* Enumeration for option types */
44enum option_type {
45 SET_ROW_COLUMN_NAME,
46 SET_DATA_COLUMN_NAME,
47 SET_ROOTNAME_COLUMN_NAME,
48 SET_PIPE,
49 SET_MAJOR_ORDER,
50 N_OPTIONS
51};
52
53char *option[N_OPTIONS] =
54 {
55 "rowNameColumn",
56 "dataColumnName",
57 "rootnameColumnName",
58 "pipe",
59 "majorOrder"
60 };
61
62char *USAGE =
63 "Usage: sddsmatrix2column [<inputfile>] [<outputfile>]\n"
64 " [-pipe=<input>,<output>]\n"
65 " [-rowNameColumn=<string>]\n"
66 " [-dataColumnName=<string>]\n"
67 " [-rootnameColumnName=<string>]\n"
68 " [-majorOrder=row|column]\n\n"
69 "Options:\n"
70 " <inputfile>\n"
71 " - Contains an optional string column and multiple numerical columns.\n"
72 " - If the string column or -rowNameColumn is not provided,\n"
73 " rows will be named as Row<row_index> in the output.\n\n"
74 " <outputfile>\n"
75 " - Contains two columns: a string column and a data column.\n"
76 " - The string column combines the input string column (or Row<row_index>)\n"
77 " with the input data column names.\n\n"
78 " -pipe=<input>,<output>\n"
79 " - Specifies pipe flags for input and/or output.\n\n"
80 " -rowNameColumn=<string>\n"
81 " - Specifies the column name for row names in the input file.\n"
82 " - If not provided, rows will be named as Row<row_index>.\n\n"
83 " -dataColumnName=<string>\n"
84 " - Specifies the column name for data in the output file.\n"
85 " - If not provided, \"Rootname\" will be used.\n\n"
86 " -rootnameColumnName=<string>\n"
87 " - Specifies the column name for the string column in the output file.\n\n"
88 " -majorOrder=row|column\n"
89 " - Determines the order to transfer the matrix into one column.\n"
90 " Choose 'row' for row-major order or 'column' for column-major order.\n\n"
91 "Description:\n"
92 " sddsmatrix2column converts a matrix into a single column format.\n\n"
93 "Link date: " __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION "\n\n";
94
95int main(int argc, char **argv) {
96 SDDS_DATASET SDDS_dataset, SDDS_orig;
97 char tmpName[1024];
98 long i_arg, tmpfile_used = 0, j, column_major;
99 SCANNED_ARG *s_arg;
100 int32_t page, numCols, columns, columnType;
101 int64_t i, rows, outputRows, outputRow;
102 char *inputfile, *outputfile;
103 char *rowColName, *dataColName, *rootnameColName, **rowName, **columnName;
104 double data;
105 unsigned long pipeFlags, majorOrderFlag;
106
107 inputfile = outputfile = rowColName = dataColName = rootnameColName = NULL;
108 pipeFlags = 0;
109 rowName = columnName = NULL;
110 majorOrderFlag = 0;
111 column_major = 1; /* Default as column major */
112
114 argc = scanargs(&s_arg, argc, argv);
115 if (argc < 2) {
116 fprintf(stderr, "%s\n", USAGE);
117 return 1;
118 }
119
120 for (i_arg = 1; i_arg < argc; i_arg++) {
121 if (s_arg[i_arg].arg_type == OPTION) {
122 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
123 case SET_MAJOR_ORDER:
124 s_arg[i_arg].n_items -= 1;
125 if (s_arg[i_arg].n_items > 0 &&
126 (!scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
127 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
128 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
129 SDDS_Bomb("invalid -majorOrder syntax/values");
130 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
131 column_major = 1;
132 if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
133 column_major = 0;
134 break;
135 case SET_ROW_COLUMN_NAME:
136 if (s_arg[i_arg].n_items != 2) {
137 fprintf(stderr, "Error (%s): invalid -rowNameColumn syntax\n", argv[0]);
138 return 1;
139 }
140 rowColName = s_arg[i_arg].list[1];
141 break;
142 case SET_DATA_COLUMN_NAME:
143 if (s_arg[i_arg].n_items != 2) {
144 fprintf(stderr, "Error (%s): invalid -dataColumnName syntax\n", argv[0]);
145 return 1;
146 }
147 dataColName = s_arg[i_arg].list[1];
148 break;
149 case SET_ROOTNAME_COLUMN_NAME:
150 if (s_arg[i_arg].n_items != 2) {
151 fprintf(stderr, "Error (%s): invalid -rootnameColumnName syntax\n", argv[0]);
152 return 1;
153 }
154 rootnameColName = s_arg[i_arg].list[1];
155 break;
156 case SET_PIPE:
157 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags)) {
158 fprintf(stderr, "Error (%s): invalid -pipe syntax\n", argv[0]);
159 return 1;
160 }
161 break;
162 default:
163 fprintf(stderr, "Error: unknown option -- %s provided.\n", s_arg[i_arg].list[0]);
164 return 1;
165 }
166 } else {
167 if (inputfile == NULL)
168 inputfile = s_arg[i_arg].list[0];
169 else if (outputfile == NULL)
170 outputfile = s_arg[i_arg].list[0];
171 else {
172 fprintf(stderr, "Error (%s): too many filenames\n", argv[0]);
173 return 1;
174 }
175 }
176 }
177
178 if ((!pipeFlags && !outputfile)) {
179 fprintf(stderr, "Error: output file not provided.\n");
180 return 1;
181 }
182 processFilenames("sddsmatrix2column", &inputfile, &outputfile, pipeFlags, 0, &tmpfile_used);
183
184 numCols = 0;
185 if (!SDDS_InitializeInput(&SDDS_orig, inputfile)) {
186 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
187 return 1;
188 }
189 if (!SDDS_InitializeOutput(&SDDS_dataset, SDDS_orig.layout.data_mode.mode, 1, NULL, NULL, outputfile) ||
190 !SDDS_DefineSimpleColumn(&SDDS_dataset, rootnameColName ? rootnameColName : "Rootname", NULL, SDDS_STRING) ||
191 !SDDS_DefineSimpleColumn(&SDDS_dataset, dataColName ? dataColName : "Data", NULL, SDDS_DOUBLE) ||
192 !SDDS_DefineSimpleParameter(&SDDS_dataset, "InputFile", NULL, SDDS_STRING) ||
193 !SDDS_WriteLayout(&SDDS_dataset)) {
194 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
195 return 1;
196 }
197 if (!(columnName = (char **)SDDS_GetColumnNames(&SDDS_orig, &columns))) {
198 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
199 return 1;
200 }
201 numCols = 0;
202 for (i = 0; i < columns; i++) {
203 if (SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, i))) {
204 numCols++;
205 }
206 }
207
208 while ((page = SDDS_ReadPage(&SDDS_orig)) > 0) {
209 if ((rows = SDDS_CountRowsOfInterest(&SDDS_orig)) < 0) {
210 fprintf(stderr, "Error: problem counting rows in input page\n");
211 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
212 return 1;
213 }
214 outputRows = rows * numCols;
215 if (!SDDS_StartPage(&SDDS_dataset, outputRows)) {
216 fprintf(stderr, "Error: problem starting output page\n");
217 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
218 return 1;
219 }
220 if (rows > 0) {
221 if (rowColName) {
222 if (SDDS_CheckColumn(&SDDS_orig, rowColName, NULL, SDDS_STRING, NULL) == SDDS_CHECK_OK) {
223 rowName = (char **)SDDS_GetColumn(&SDDS_orig, rowColName);
224 } else {
225 fprintf(stderr, "Error %s column does not exist or not string type in input file %s\n", rowColName, inputfile);
226 return 1;
227 }
228 }
229 outputRow = 0;
230 if (!column_major) {
231 for (i = 0; i < rows; i++) {
232 for (j = 0; j < columns; j++) {
233 if (SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, j))) {
234 if (rowColName) {
235 snprintf(tmpName, sizeof(tmpName), "%s%s", rowName[i], SDDS_orig.layout.column_definition[j].name);
236 } else {
237 snprintf(tmpName, sizeof(tmpName), "Row%" PRId64 "%s", i, SDDS_orig.layout.column_definition[j].name);
238 }
239 switch (columnType) {
240 case SDDS_LONGDOUBLE:
241 data = ((long double *)SDDS_orig.data[j])[i];
242 break;
243 case SDDS_DOUBLE:
244 data = ((double *)SDDS_orig.data[j])[i];
245 break;
246 case SDDS_FLOAT:
247 data = ((float *)SDDS_orig.data[j])[i];
248 break;
249 case SDDS_LONG64:
250 data = ((int64_t *)SDDS_orig.data[j])[i];
251 break;
252 case SDDS_ULONG64:
253 data = ((uint64_t *)SDDS_orig.data[j])[i];
254 break;
255 case SDDS_LONG:
256 data = ((int32_t *)SDDS_orig.data[j])[i];
257 break;
258 case SDDS_ULONG:
259 data = ((uint32_t *)SDDS_orig.data[j])[i];
260 break;
261 case SDDS_SHORT:
262 data = ((short *)SDDS_orig.data[j])[i];
263 break;
264 case SDDS_USHORT:
265 data = ((unsigned short *)SDDS_orig.data[j])[i];
266 break;
267 default:
268 data = 0.0; /* Fallback for unsupported types */
269 }
270
271 if (!SDDS_SetRowValues(&SDDS_dataset, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, outputRow, 0, tmpName, 1, data, -1)) {
272 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
273 return 1;
274 }
275 outputRow++;
276 }
277 }
278 }
279 } else {
280 for (j = 0; j < columns; j++) {
281 if (!SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, j)))
282 continue;
283 for (i = 0; i < rows; i++) {
284 if (rowColName) {
285 snprintf(tmpName, sizeof(tmpName), "%s%s", SDDS_orig.layout.column_definition[j].name, rowName[i]);
286 } else {
287 snprintf(tmpName, sizeof(tmpName), "%sRow%" PRId64, SDDS_orig.layout.column_definition[j].name, i);
288 }
289 switch (columnType) {
290 case SDDS_LONGDOUBLE:
291 data = ((long double *)SDDS_orig.data[j])[i];
292 break;
293 case SDDS_DOUBLE:
294 data = ((double *)SDDS_orig.data[j])[i];
295 break;
296 case SDDS_FLOAT:
297 data = ((float *)SDDS_orig.data[j])[i];
298 break;
299 case SDDS_LONG64:
300 data = ((int64_t *)SDDS_orig.data[j])[i];
301 break;
302 case SDDS_ULONG64:
303 data = ((uint64_t *)SDDS_orig.data[j])[i];
304 break;
305 case SDDS_LONG:
306 data = ((int32_t *)SDDS_orig.data[j])[i];
307 break;
308 case SDDS_ULONG:
309 data = ((uint32_t *)SDDS_orig.data[j])[i];
310 break;
311 case SDDS_SHORT:
312 data = ((short *)SDDS_orig.data[j])[i];
313 break;
314 case SDDS_USHORT:
315 data = ((unsigned short *)SDDS_orig.data[j])[i];
316 break;
317 default:
318 data = 0.0; /* Fallback for unsupported types */
319 }
320
321 if (!SDDS_SetRowValues(&SDDS_dataset, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, outputRow, 0, tmpName, 1, data, -1)) {
322 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
323 return 1;
324 }
325 outputRow++;
326 }
327 }
328 }
329 }
330 if (!SDDS_SetParameters(&SDDS_dataset, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, "InputFile", inputfile ? inputfile : "pipe", NULL) ||
331 !SDDS_WritePage(&SDDS_dataset)) {
332 fprintf(stderr, "Error: problem writing page to file %s\n", outputfile);
333 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
334 return 1;
335 }
336 if (rowColName) {
337 SDDS_FreeStringArray(rowName, rows);
338 rowName = NULL;
339 }
340 }
341 if (!SDDS_Terminate(&SDDS_orig) || !SDDS_Terminate(&SDDS_dataset)) {
342 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
343 return 1;
344 }
345 if (tmpfile_used && !replaceFileAndBackUp(inputfile, outputfile)) {
346 return 1;
347 }
348
349 free_scanargs(&s_arg, argc);
350 return 0;
351}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
Definition SDDStypes.h:67
#define SDDS_FLOAT
Identifier for the float data type.
Definition SDDStypes.h:43
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
Definition SDDStypes.h:55
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
Definition SDDStypes.h:79
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
Definition SDDStypes.h:138
#define SDDS_LONGDOUBLE
Identifier for the long double data type.
Definition SDDStypes.h:31
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.