SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
sddsmatrix2column.c File Reference

Detailed Description

Converts a matrix into a single-column SDDS file format.

This program reads an SDDS file containing an optional string column and multiple numerical columns and outputs an SDDS file with two columns: a string column and a data column. The string column is a combination of the input string column (or generated row names) and the input data column names. It supports various options for row names, data column names, and matrix traversal order, making it flexible for diverse data transformation needs.

Usage

sddsmatrix2column [<inputfile>] [<outputfile>]
[-pipe=[input][,output]]
[-rowNameColumn=<string>]
[-dataColumnName=<string>]
[-rootnameColumnName=<string>]
[-majorOrder=row|column]

Options

Optional Description
-pipe Specifies pipe flags for input and/or output.
-rowNameColumn Specifies the column name for row names in the input file.
-dataColumnName Specifies the column name for the data column in the output file.
-rootnameColumnName Specifies the column name for the string column in the output file.
-majorOrder Determines the matrix traversal order (row-major or column-major).
License
This file is distributed under the terms of the Software License Agreement found in the file LICENSE included with this distribution.
Authors
H. Shang, R. Soliday

Definition in file sddsmatrix2column.c.

#include "mdb.h"
#include "SDDS.h"
#include "scan.h"

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int argc,
char ** argv )

Definition at line 99 of file sddsmatrix2column.c.

99 {
100 SDDS_DATASET SDDS_dataset, SDDS_orig;
101 char tmpName[1024];
102 long i_arg, tmpfile_used = 0, j, column_major;
103 SCANNED_ARG *s_arg;
104 int32_t page, numCols, columns, columnType;
105 int64_t i, rows, outputRows, outputRow;
106 char *inputfile, *outputfile;
107 char *rowColName, *dataColName, *rootnameColName, **rowName, **columnName;
108 double data;
109 unsigned long pipeFlags, majorOrderFlag;
110
111 inputfile = outputfile = rowColName = dataColName = rootnameColName = NULL;
112 pipeFlags = 0;
113 rowName = columnName = NULL;
114 majorOrderFlag = 0;
115 column_major = 1; /* Default as column major */
116
118 argc = scanargs(&s_arg, argc, argv);
119 if (argc < 2) {
120 fprintf(stderr, "%s\n", USAGE);
121 return 1;
122 }
123
124 for (i_arg = 1; i_arg < argc; i_arg++) {
125 if (s_arg[i_arg].arg_type == OPTION) {
126 switch (match_string(s_arg[i_arg].list[0], option, N_OPTIONS, 0)) {
127 case SET_MAJOR_ORDER:
128 s_arg[i_arg].n_items -= 1;
129 if (s_arg[i_arg].n_items > 0 &&
130 (!scanItemList(&majorOrderFlag, s_arg[i_arg].list + 1, &s_arg[i_arg].n_items, 0,
131 "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
132 "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL)))
133 SDDS_Bomb("invalid -majorOrder syntax/values");
134 if (majorOrderFlag & SDDS_COLUMN_MAJOR_ORDER)
135 column_major = 1;
136 if (majorOrderFlag & SDDS_ROW_MAJOR_ORDER)
137 column_major = 0;
138 break;
139 case SET_ROW_COLUMN_NAME:
140 if (s_arg[i_arg].n_items != 2) {
141 fprintf(stderr, "Error (%s): invalid -rowNameColumn syntax\n", argv[0]);
142 return 1;
143 }
144 rowColName = s_arg[i_arg].list[1];
145 break;
146 case SET_DATA_COLUMN_NAME:
147 if (s_arg[i_arg].n_items != 2) {
148 fprintf(stderr, "Error (%s): invalid -dataColumnName syntax\n", argv[0]);
149 return 1;
150 }
151 dataColName = s_arg[i_arg].list[1];
152 break;
153 case SET_ROOTNAME_COLUMN_NAME:
154 if (s_arg[i_arg].n_items != 2) {
155 fprintf(stderr, "Error (%s): invalid -rootnameColumnName syntax\n", argv[0]);
156 return 1;
157 }
158 rootnameColName = s_arg[i_arg].list[1];
159 break;
160 case SET_PIPE:
161 if (!processPipeOption(s_arg[i_arg].list + 1, s_arg[i_arg].n_items - 1, &pipeFlags)) {
162 fprintf(stderr, "Error (%s): invalid -pipe syntax\n", argv[0]);
163 return 1;
164 }
165 break;
166 default:
167 fprintf(stderr, "Error: unknown option -- %s provided.\n", s_arg[i_arg].list[0]);
168 return 1;
169 }
170 } else {
171 if (inputfile == NULL)
172 inputfile = s_arg[i_arg].list[0];
173 else if (outputfile == NULL)
174 outputfile = s_arg[i_arg].list[0];
175 else {
176 fprintf(stderr, "Error (%s): too many filenames\n", argv[0]);
177 return 1;
178 }
179 }
180 }
181
182 if ((!pipeFlags && !outputfile)) {
183 fprintf(stderr, "Error: output file not provided.\n");
184 return 1;
185 }
186 processFilenames("sddsmatrix2column", &inputfile, &outputfile, pipeFlags, 0, &tmpfile_used);
187
188 numCols = 0;
189 if (!SDDS_InitializeInput(&SDDS_orig, inputfile)) {
190 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
191 return 1;
192 }
193 if (!SDDS_InitializeOutput(&SDDS_dataset, SDDS_orig.layout.data_mode.mode, 1, NULL, NULL, outputfile) ||
194 !SDDS_DefineSimpleColumn(&SDDS_dataset, rootnameColName ? rootnameColName : "Rootname", NULL, SDDS_STRING) ||
195 !SDDS_DefineSimpleColumn(&SDDS_dataset, dataColName ? dataColName : "Data", NULL, SDDS_DOUBLE) ||
196 !SDDS_DefineSimpleParameter(&SDDS_dataset, "InputFile", NULL, SDDS_STRING) ||
197 !SDDS_WriteLayout(&SDDS_dataset)) {
198 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
199 return 1;
200 }
201 if (!(columnName = (char **)SDDS_GetColumnNames(&SDDS_orig, &columns))) {
202 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
203 return 1;
204 }
205 numCols = 0;
206 for (i = 0; i < columns; i++) {
207 if (SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, i))) {
208 numCols++;
209 }
210 }
211
212 while ((page = SDDS_ReadPage(&SDDS_orig)) > 0) {
213 if ((rows = SDDS_CountRowsOfInterest(&SDDS_orig)) < 0) {
214 fprintf(stderr, "Error: problem counting rows in input page\n");
215 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
216 return 1;
217 }
218 outputRows = rows * numCols;
219 if (!SDDS_StartPage(&SDDS_dataset, outputRows)) {
220 fprintf(stderr, "Error: problem starting output page\n");
221 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
222 return 1;
223 }
224 if (rows > 0) {
225 if (rowColName) {
226 if (SDDS_CheckColumn(&SDDS_orig, rowColName, NULL, SDDS_STRING, NULL) == SDDS_CHECK_OK) {
227 rowName = (char **)SDDS_GetColumn(&SDDS_orig, rowColName);
228 } else {
229 fprintf(stderr, "Error %s column does not exist or not string type in input file %s\n", rowColName, inputfile);
230 return 1;
231 }
232 }
233 outputRow = 0;
234 if (!column_major) {
235 for (i = 0; i < rows; i++) {
236 for (j = 0; j < columns; j++) {
237 if (SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, j))) {
238 if (rowColName) {
239 snprintf(tmpName, sizeof(tmpName), "%s%s", rowName[i], SDDS_orig.layout.column_definition[j].name);
240 } else {
241 snprintf(tmpName, sizeof(tmpName), "Row%" PRId64 "%s", i, SDDS_orig.layout.column_definition[j].name);
242 }
243 switch (columnType) {
244 case SDDS_LONGDOUBLE:
245 data = ((long double *)SDDS_orig.data[j])[i];
246 break;
247 case SDDS_DOUBLE:
248 data = ((double *)SDDS_orig.data[j])[i];
249 break;
250 case SDDS_FLOAT:
251 data = ((float *)SDDS_orig.data[j])[i];
252 break;
253 case SDDS_LONG64:
254 data = ((int64_t *)SDDS_orig.data[j])[i];
255 break;
256 case SDDS_ULONG64:
257 data = ((uint64_t *)SDDS_orig.data[j])[i];
258 break;
259 case SDDS_LONG:
260 data = ((int32_t *)SDDS_orig.data[j])[i];
261 break;
262 case SDDS_ULONG:
263 data = ((uint32_t *)SDDS_orig.data[j])[i];
264 break;
265 case SDDS_SHORT:
266 data = ((short *)SDDS_orig.data[j])[i];
267 break;
268 case SDDS_USHORT:
269 data = ((unsigned short *)SDDS_orig.data[j])[i];
270 break;
271 default:
272 data = 0.0; /* Fallback for unsupported types */
273 }
274
275 if (!SDDS_SetRowValues(&SDDS_dataset, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, outputRow, 0, tmpName, 1, data, -1)) {
276 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
277 return 1;
278 }
279 outputRow++;
280 }
281 }
282 }
283 } else {
284 for (j = 0; j < columns; j++) {
285 if (!SDDS_NUMERIC_TYPE(columnType = SDDS_GetColumnType(&SDDS_orig, j)))
286 continue;
287 for (i = 0; i < rows; i++) {
288 if (rowColName) {
289 snprintf(tmpName, sizeof(tmpName), "%s%s", SDDS_orig.layout.column_definition[j].name, rowName[i]);
290 } else {
291 snprintf(tmpName, sizeof(tmpName), "%sRow%" PRId64, SDDS_orig.layout.column_definition[j].name, i);
292 }
293 switch (columnType) {
294 case SDDS_LONGDOUBLE:
295 data = ((long double *)SDDS_orig.data[j])[i];
296 break;
297 case SDDS_DOUBLE:
298 data = ((double *)SDDS_orig.data[j])[i];
299 break;
300 case SDDS_FLOAT:
301 data = ((float *)SDDS_orig.data[j])[i];
302 break;
303 case SDDS_LONG64:
304 data = ((int64_t *)SDDS_orig.data[j])[i];
305 break;
306 case SDDS_ULONG64:
307 data = ((uint64_t *)SDDS_orig.data[j])[i];
308 break;
309 case SDDS_LONG:
310 data = ((int32_t *)SDDS_orig.data[j])[i];
311 break;
312 case SDDS_ULONG:
313 data = ((uint32_t *)SDDS_orig.data[j])[i];
314 break;
315 case SDDS_SHORT:
316 data = ((short *)SDDS_orig.data[j])[i];
317 break;
318 case SDDS_USHORT:
319 data = ((unsigned short *)SDDS_orig.data[j])[i];
320 break;
321 default:
322 data = 0.0; /* Fallback for unsupported types */
323 }
324
325 if (!SDDS_SetRowValues(&SDDS_dataset, SDDS_SET_BY_INDEX | SDDS_PASS_BY_VALUE, outputRow, 0, tmpName, 1, data, -1)) {
326 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
327 return 1;
328 }
329 outputRow++;
330 }
331 }
332 }
333 }
334 if (!SDDS_SetParameters(&SDDS_dataset, SDDS_SET_BY_NAME | SDDS_PASS_BY_VALUE, "InputFile", inputfile ? inputfile : "pipe", NULL) ||
335 !SDDS_WritePage(&SDDS_dataset)) {
336 fprintf(stderr, "Error: problem writing page to file %s\n", outputfile);
337 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
338 return 1;
339 }
340 if (rowColName) {
341 SDDS_FreeStringArray(rowName, rows);
342 rowName = NULL;
343 }
344 }
345 if (!SDDS_Terminate(&SDDS_orig) || !SDDS_Terminate(&SDDS_dataset)) {
346 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
347 return 1;
348 }
349 if (tmpfile_used && !replaceFileAndBackUp(inputfile, outputfile)) {
350 return 1;
351 }
352
353 free_scanargs(&s_arg, argc);
354 return 0;
355}
int32_t SDDS_SetRowValues(SDDS_DATASET *SDDS_dataset, int32_t mode, int64_t row,...)
int32_t SDDS_StartPage(SDDS_DATASET *SDDS_dataset, int64_t expected_n_rows)
int32_t SDDS_SetParameters(SDDS_DATASET *SDDS_dataset, int32_t mode,...)
void * SDDS_GetColumn(SDDS_DATASET *SDDS_dataset, char *column_name)
Retrieves a copy of the data for a specified column, including only rows marked as "of interest".
int64_t SDDS_CountRowsOfInterest(SDDS_DATASET *SDDS_dataset)
Counts the number of rows marked as "of interest" in the current data table.
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_InitializeOutput(SDDS_DATASET *SDDS_dataset, int32_t data_mode, int32_t lines_per_row, const char *description, const char *contents, const char *filename)
Initializes the SDDS output dataset.
int32_t SDDS_DefineSimpleColumn(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data column within the SDDS dataset.
int32_t SDDS_DefineSimpleParameter(SDDS_DATASET *SDDS_dataset, const char *name, const char *unit, int32_t type)
Defines a simple data parameter within the SDDS dataset.
int32_t SDDS_WritePage(SDDS_DATASET *SDDS_dataset)
Writes the current data table to the output file.
int32_t SDDS_WriteLayout(SDDS_DATASET *SDDS_dataset)
Writes the SDDS layout header to the output file.
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
int32_t SDDS_CheckColumn(SDDS_DATASET *SDDS_dataset, char *name, char *units, int32_t type, FILE *fp_message)
Checks if a column exists in the SDDS dataset with the specified name, units, and type.
char ** SDDS_GetColumnNames(SDDS_DATASET *SDDS_dataset, int32_t *number)
Retrieves the names of all columns in the SDDS dataset.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_RegisterProgramName(const char *name)
Registers the executable program name for use in error messages.
Definition SDDS_utils.c:288
int32_t SDDS_GetColumnType(SDDS_DATASET *SDDS_dataset, int32_t index)
Retrieves the data type of a column in the SDDS dataset by its index.
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
#define SDDS_ULONG
Identifier for the unsigned 32-bit integer data type.
Definition SDDStypes.h:67
#define SDDS_FLOAT
Identifier for the float data type.
Definition SDDStypes.h:43
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_ULONG64
Identifier for the unsigned 64-bit integer data type.
Definition SDDStypes.h:55
#define SDDS_LONG
Identifier for the signed 32-bit integer data type.
Definition SDDStypes.h:61
#define SDDS_SHORT
Identifier for the signed short integer data type.
Definition SDDStypes.h:73
#define SDDS_USHORT
Identifier for the unsigned short integer data type.
Definition SDDStypes.h:79
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
#define SDDS_NUMERIC_TYPE(type)
Checks if the given type identifier corresponds to any numeric type.
Definition SDDStypes.h:138
#define SDDS_LONGDOUBLE
Identifier for the long double data type.
Definition SDDStypes.h:31
#define SDDS_LONG64
Identifier for the signed 64-bit integer data type.
Definition SDDStypes.h:49
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long replaceFileAndBackUp(char *file, char *replacement)
Replaces a file with a replacement file and creates a backup of the original.
Definition replacefile.c:75
int scanargs(SCANNED_ARG **scanned, int argc, char **argv)
Definition scanargs.c:36
long processPipeOption(char **item, long items, unsigned long *flags)
Definition scanargs.c:356
void processFilenames(char *programName, char **input, char **output, unsigned long pipeFlags, long noWarnings, long *tmpOutputUsed)
Definition scanargs.c:390
void free_scanargs(SCANNED_ARG **scanned, int argc)
Definition scanargs.c:584
long scanItemList(unsigned long *flags, char **item, long *items, unsigned long mode,...)
Scans a list of items and assigns values based on provided keywords and types.