SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsmatrix2column.c
Go to the documentation of this file.
1/**
2 * @file sddsmatrix2column.c
3 * @brief Converts a matrix into a single-column SDDS file format.
4 *
5 * @details
6 * This program reads an SDDS file containing an optional string column and multiple numerical columns
7 * and outputs an SDDS file with two columns: a string column and a data column. The string column is a
8 * combination of the input string column (or generated row names) and the input data column names. It
9 * supports various options for row names, data column names, and matrix traversal order, making it flexible
10 * for diverse data transformation needs.
11 *
12 * @section Usage
13 * ```
14 * sddsmatrix2column [<inputfile>] [<outputfile>]
15 * [-pipe=[input][,output]]
16 * [-rowNameColumn=<string>]
17 * [-dataColumnName=<string>]
18 * [-rootnameColumnName=<string>]
19 * [-majorOrder=row|column]
20 * ```
21 *
22 * @section Options
23 * | Optional | Description |
24 * |---------------------------------------|---------------------------------------------------------------------------------------|
25 * | `-pipe` | Specifies pipe flags for input and/or output. |
26 * | `-rowNameColumn` | Specifies the column name for row names in the input file. |
27 * | `-dataColumnName` | Specifies the column name for the data column in the output file. |
28 * | `-rootnameColumnName` | Specifies the column name for the string column in the output file. |
29 * | `-majorOrder` | Determines the matrix traversal order (row-major or column-major). |
30 *
31 * @copyright
32 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
33 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
34 *
35 * @license
36 * This file is distributed under the terms of the Software License Agreement
37 * found in the file LICENSE included with this distribution.
38 *
39 * @authors
40 * H. Shang, R. Soliday
41 */
42
43#include "mdb.h"
44#include "SDDS.h"
45#include "scan.h"
46
47/* Enumeration for option types */
48enum option_type {
49 SET_ROW_COLUMN_NAME,
50 SET_DATA_COLUMN_NAME,
51 SET_ROOTNAME_COLUMN_NAME,
52 SET_PIPE,
53 SET_MAJOR_ORDER,
54 N_OPTIONS
55};
56
57char *option[N_OPTIONS] =
58 {
59 "rowNameColumn",
60 "dataColumnName",
61 "rootnameColumnName",
62 "pipe",
63 "majorOrder"
64 };
65
66char *USAGE =
67 "Usage: sddsmatrix2column [<inputfile>] [<outputfile>]\n"
68 " [-pipe=<input>,<output>]\n"
69 " [-rowNameColumn=<string>]\n"
70 " [-dataColumnName=<string>]\n"
71 " [-rootnameColumnName=<string>]\n"
72 " [-majorOrder=row|column]\n\n"
73 "Options:\n"
74 " <inputfile>\n"
75 " - Contains an optional string column and multiple numerical columns.\n"
76 " - If the string column or -rowNameColumn is not provided,\n"
77 " rows will be named as Row<row_index> in the output.\n\n"
78 " <outputfile>\n"
79 " - Contains two columns: a string column and a data column.\n"
80 " - The string column combines the input string column (or Row<row_index>)\n"
81 " with the input data column names.\n\n"
82 " -pipe=<input>,<output>\n"
83 " - Specifies pipe flags for input and/or output.\n\n"
84 " -rowNameColumn=<string>\n"
85 " - Specifies the column name for row names in the input file.\n"
86 " - If not provided, rows will be named as Row<row_index>.\n\n"
87 " -dataColumnName=<string>\n"
88 " - Specifies the column name for data in the output file.\n"
89 " - If not provided, \"Rootname\" will be used.\n\n"
90 " -rootnameColumnName=<string>\n"
91 " - Specifies the column name for the string column in the output file.\n\n"
92 " -majorOrder=row|column\n"
93 " - Determines the order to transfer the matrix into one column.\n"
94 " Choose 'row' for row-major order or 'column' for column-major order.\n\n"
95 "Description:\n"
96 " sddsmatrix2column converts a matrix into a single column format.\n\n"
97 "Link date: " __DATE__ " " __TIME__ ", SVN revision: " SVN_VERSION "\n\n";
98
99int main(int argc, char **argv) {
100 SDDS_DATASET SDDS_dataset, SDDS_orig;
101 char tmpName[1024];
102 long i_arg, tmpfile_used = 0, j, column_major;
103 SCANNED_ARG *s_arg;
104 int32_t page, numCols, columns, columnType;
105 int64_t i, rows, outputRows, outputRow;
106 char *inputfile, *outputfile;
107 char *rowColName, *dataColName, *rootnameColName, **rowName, **columnName;
108 double data;
109 unsigned long pipeFlags, majorOrderFlag;
110
111 inputfile = outputfile = rowColName = dataColName = rootnameColName = NULL;
112 pipeFlags = 0;
113 rowName = columnName = NULL;
114 majorOrderFlag = 0;
115 column_major = 1; /* Default as column major */
116
118 argc = scanargs(&s_arg, argc, argv);
119 if (argc < 2) {
120 fprintf(stderr, "%s\n", USAGE);
121 return 1;
122 }
123
124 for (i_arg = 1; i_arg < argc; i_arg++) {
125 if (s_arg[i_arg].arg_type == OPTION) {
126 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
127 case SET_MAJOR_ORDER:
128 s_arg[i_arg].n_items -= 1;
129 if (s_arg[i_arg].n_items > 0 &&
130 (!scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
131 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
132 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
133 SDDS_Bomb("invalid -majorOrder syntax/values");
134 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
135 column_major = 1;
136 if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
137 column_major = 0;
138 break;
139 case SET_ROW_COLUMN_NAME:
140 if (s_arg[i_arg].n_items != 2) {
141 fprintf(stderr, "Error (%s): invalid -rowNameColumn syntax\n", argv[0]);
142 return 1;
143 }
144 rowColName = s_arg[i_arg].list[1];
145 break;
146 case SET_DATA_COLUMN_NAME:
147 if (s_arg[i_arg].n_items != 2) {
148 fprintf(stderr, "Error (%s): invalid -dataColumnName syntax\n", argv[0]);
149 return 1;
150 }
151 dataColName = s_arg[i_arg].list[1];
152 break;
153 case SET_ROOTNAME_COLUMN_NAME:
154 if (s_arg[i_arg].n_items != 2) {
155 fprintf(stderr, "Error (%s): invalid -rootnameColumnName syntax\n", argv[0]);
156 return 1;
157 }
158 rootnameColName = s_arg[i_arg].list[1];
159 break;
160 case SET_PIPE:
161 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags)) {
162 fprintf(stderr, "Error (%s): invalid -pipe syntax\n", argv[0]);
163 return 1;
164 }
165 break;
166 default:
167 fprintf(stderr, "Error: unknown option -- %s provided.\n", s_arg[i_arg].list[0]);
168 return 1;
169 }
170 } else {
171 if (inputfile == NULL)
172 inputfile = s_arg[i_arg].list[0];
173 else if (outputfile == NULL)
174 outputfile = s_arg[i_arg].list[0];
175 else {
176 fprintf(stderr, "Error (%s): too many filenames\n", argv[0]);
177 return 1;
178 }
179 }
180 }
181
182 if ((!pipeFlags && !outputfile)) {
183 fprintf(stderr, "Error: output file not provided.\n");
184 return 1;
185 }
186 processFilenames("sddsmatrix2column", &inputfile, &outputfile, pipeFlags, 0, &tmpfile_used);
187
188 numCols = 0;
189 if (!SDDS_InitializeInput(&SDDS_orig, inputfile)) {
190 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
191 return 1;
192 }
193 if (!SDDS_InitializeOutput(&SDDS_dataset, SDDS_orig.layout.data_mode.mode, 1, NULL, NULL, outputfile) ||
194 !SDDS_DefineSimpleColumn(&SDDS_dataset, rootnameColName ? rootnameColName : "Rootname", NULL, SDDS_STRING) ||
195 !SDDS_DefineSimpleColumn(&SDDS_dataset, dataColName ? dataColName : "Data", NULL, SDDS_DOUBLE) ||
196 !SDDS_DefineSimpleParameter(&SDDS_dataset, "InputFile", NULL, SDDS_STRING) ||
197 !SDDS_WriteLayout(&SDDS_dataset)) {
198 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
199 return 1;
200 }
201 if (!(columnName = (char **)SDDS_GetColumnNames(&SDDS_orig, &columns))) {
202 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
203 return 1;
204 }
205 numCols = 0;
206 for (i = 0; i < columns; i++) {
207 if (SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, i))) {
208 numCols++;
209 }
210 }
211
212 while ((page = SDDS_ReadPage(&SDDS_orig)) > 0) {
213 if ((rows = SDDS_CountRowsOfInterest(&SDDS_orig)) < 0) {
214 fprintf(stderr, "Error: problem counting rows in input page\n");
215 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
216 return 1;
217 }
218 outputRows = rows * numCols;
219 if (!SDDS_StartPage(&SDDS_dataset, outputRows)) {
220 fprintf(stderr, "Error: problem starting output page\n");
221 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
222 return 1;
223 }
224 if (rows > 0) {
225 if (rowColName) {
226 if (SDDS_CheckColumn(&SDDS_orig, rowColName, NULL, SDDS_STRING, NULL) == SDDS_CHECK_OK) {
227 rowName = (char **)SDDS_GetColumn(&SDDS_orig, rowColName);
228 } else {
229 fprintf(stderr, "Error %s column does not exist or not string type in input file %s\n", rowColName, inputfile);
230 return 1;
231 }
232 }
233 outputRow = 0;
234 if (!column_major) {
235 for (i = 0; i < rows; i++) {
236 for (j = 0; j < columns; j++) {
237 if (SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, j))) {
238 if (rowColName) {
239 snprintf(tmpName, sizeof(tmpName), "%s%s", rowName[i], SDDS_orig.layout.column_definition[j].name);
240 } else {
241 snprintf(tmpName, sizeof(tmpName), "Row%" PRId64 "%s", i, SDDS_orig.layout.column_definition[j].name);
242 }
243 switch (columnType) {
244 case SDDS_LONGDOUBLE:
245 data = ((long double *)SDDS_orig.data[j])[i];
246 break;
247 case SDDS_DOUBLE:
248 data = ((double *)SDDS_orig.data[j])[i];
249 break;
250 case SDDS_FLOAT:
251 data = ((float *)SDDS_orig.data[j])[i];
252 break;
253 case SDDS_LONG64:
254 data = ((int64_t *)SDDS_orig.data[j])[i];
255 break;
256 case SDDS_ULONG64:
257 data = ((uint64_t *)SDDS_orig.data[j])[i];
258 break;
259 case SDDS_LONG:
260 data = ((int32_t *)SDDS_orig.data[j])[i];
261 break;
262 case SDDS_ULONG:
263 data = ((uint32_t *)SDDS_orig.data[j])[i];
264 break;
265 case SDDS_SHORT:
266 data = ((short *)SDDS_orig.data[j])[i];
267 break;
268 case SDDS_USHORT:
269 data = ((unsigned short *)SDDS_orig.data[j])[i];
270 break;
271 default:
272 data = 0.0; /* Fallback for unsupported types */
273 }
274
275 if (!SDDS_SetRowValues(&SDDS_dataset, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, outputRow, 0, tmpName, 1, data, -1)) {
276 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
277 return 1;
278 }
279 outputRow++;
280 }
281 }
282 }
283 } else {
284 for (j = 0; j < columns; j++) {
285 if (!SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, j)))
286 continue;
287 for (i = 0; i < rows; i++) {
288 if (rowColName) {
289 snprintf(tmpName, sizeof(tmpName), "%s%s", SDDS_orig.layout.column_definition[j].name, rowName[i]);
290 } else {
291 snprintf(tmpName, sizeof(tmpName), "%sRow%" PRId64, SDDS_orig.layout.column_definition[j].name, i);
292 }
293 switch (columnType) {
294 case SDDS_LONGDOUBLE:
295 data = ((long double *)SDDS_orig.data[j])[i];
296 break;
297 case SDDS_DOUBLE:
298 data = ((double *)SDDS_orig.data[j])[i];
299 break;
300 case SDDS_FLOAT:
301 data = ((float *)SDDS_orig.data[j])[i];
302 break;
303 case SDDS_LONG64:
304 data = ((int64_t *)SDDS_orig.data[j])[i];
305 break;
306 case SDDS_ULONG64:
307 data = ((uint64_t *)SDDS_orig.data[j])[i];
308 break;
309 case SDDS_LONG:
310 data = ((int32_t *)SDDS_orig.data[j])[i];
311 break;
312 case SDDS_ULONG:
313 data = ((uint32_t *)SDDS_orig.data[j])[i];
314 break;
315 case SDDS_SHORT:
316 data = ((short *)SDDS_orig.data[j])[i];
317 break;
318 case SDDS_USHORT:
319 data = ((unsigned short *)SDDS_orig.data[j])[i];
320 break;
321 default:
322 data = 0.0; /* Fallback for unsupported types */
323 }
324
325 if (!SDDS_SetRowValues(&SDDS_dataset, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, outputRow, 0, tmpName, 1, data, -1)) {
326 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
327 return 1;
328 }
329 outputRow++;
330 }
331 }
332 }
333 }
334 if (!SDDS_SetParameters(&SDDS_dataset, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, "InputFile", inputfile ? inputfile : "pipe", NULL) ||
335 !SDDS_WritePage(&SDDS_dataset)) {
336 fprintf(stderr, "Error: problem writing page to file %s\n", outputfile);
337 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
338 return 1;
339 }
340 if (rowColName) {
341 SDDS_FreeStringArray(rowName, rows);
342 rowName = NULL;
343 }
344 }
345 if (!SDDS_Terminate(&SDDS_orig) || !SDDS_Terminate(&SDDS_dataset)) {
346 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
347 return 1;
348 }
349 if (tmpfile_used && !replaceFileAndBackUp(inputfile, outputfile)) {
350 return 1;
351 }
352
353 free_scanargs(&s_arg, argc);
354 return 0;
355}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
Definition SDDStypes.h:67
#define SDDS_FLOAT
Identifier for the float data type.
Definition SDDStypes.h:43
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
Definition SDDStypes.h:55
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
Definition SDDStypes.h:79
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
Definition SDDStypes.h:138
#define SDDS_LONGDOUBLE
Identifier for the long double data type.
Definition SDDStypes.h:31
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.