SDDSlib
Loading...
Searching...
No Matches
find_files.c
Go to the documentation of this file.
1/**
2 * @file find_files.c
3 * @brief Utility functions for date handling and file operations.
4 *
5 * This file provides functions for converting two-digit years to four-digit years, determining leap years,
6 * listing files in directories, finding files within date ranges, and sorting files based on their 'StartTime' parameter.
7 */
8
9#include "mdb.h"
10#include "SDDS.h"
11
12/**
13 * @brief Converts a two-digit year into a four-digit year.
14 *
15 * @param year The input year, possibly two-digit.
16 * @return The four-digit year.
17 *
18 * If the input year is greater than 100, it is assumed to already be a four-digit year and is returned as is.
19 * If the input year is less than 90, it is assumed to be in the 2000s and 2000 is added.
20 * Otherwise, it is assumed to be in the 1900s and 1900 is added.
21 */
22short make_four_digit_year(short year) {
23 if (year > 100)
24 return year;
25 if (year < 90)
26 year += 2000;
27 else
28 year += 1900;
29 return year;
30}
31
32/*Returns 0/1 for nonleap/leap year
33 Returns -1 on error */
34long is_leap_year(short year) {
35 if (year < 0)
36 return -1;
37 year = make_four_digit_year(year);
38 return ((year % 4 == 0 && year % 100 != 0) || year % 400 == 0);
39}
40
41#if !defined(_WIN32)
42/*increaseOrder=1, increase; 0, descrease order */
43char **ls_dir(char *path, char *matchstr, long tailsOnly, long *files) {
44 DIR *dirp;
45 struct dirent *dp;
46 char buf[2056];
47 char **filename;
48 char *tmpPath;
49 long files0 = 0;
50
51 filename = NULL;
52 dirp = NULL;
53 dp = NULL;
54 if (!path) {
55 /*work with current path */
56 SDDS_CopyString(&tmpPath, ".");
57 } else {
58 if (!fexists(path)) {
59 fprintf(stderr, "path %s does not exist!\n", path);
60 exit(1);
61 }
62 SDDS_CopyString(&tmpPath, path);
63 }
64 if ((dirp = opendir(tmpPath))) {
65 while ((dp = readdir(dirp))) {
66 if (strcmp(dp->d_name, ".") && strcmp(dp->d_name, "..")) {
67 if (!matchstr || (matchstr && wild_match(dp->d_name, matchstr))) {
68 if (tailsOnly)
69 sprintf(buf, "%s", dp->d_name);
70 else
71 sprintf(buf, "%s/%s", tmpPath, dp->d_name);
72 filename = SDDS_Realloc(filename, sizeof(*filename) * (files0 + 1));
73 SDDS_CopyString(&filename[files0], buf);
74 files0++;
75 }
76 }
77 }
78 closedir(dirp);
79 }
80 if (files0) {
81 /*sort file by time */
82 }
83
84 free(tmpPath);
85 *files = files0;
86 return filename;
87}
88
89/**
90 * @brief Finds files in a directory that match specified criteria within a date range.
91 *
92 * @param directory The directory to search for files.
93 * @param rootname The root name of the files to match.
94 * @param suffix The suffix of the files to match (can be NULL).
95 * @param startYear The starting year (can be two-digit or four-digit).
96 * @param startMonth The starting month.
97 * @param startDay The starting day.
98 * @param startJDay The starting Julian day (if zero, it will be calculated from startYear, startMonth, startDay).
99 * @param endYear The ending year (can be two-digit or four-digit).
100 * @param endMonth The ending month.
101 * @param endDay The ending day.
102 * @param endJDay The ending Julian day (if zero, it will be calculated from endYear, endMonth, endDay).
103 * @param filter The filter pattern to match files against.
104 * @param extensionList An array of strings containing file extensions to match (can be NULL).
105 * @param extensions The number of extensions in extensionList.
106 * @param tailsOnly If non-zero, only the filenames are returned; otherwise, full paths are included.
107 * @param files Pointer to a long where the number of files found will be stored.
108 * @param increaseOrder If non-zero, the files are returned in increasing order; otherwise, in decreasing order.
109 * @return An array of strings containing the matching filenames. The caller is responsible for freeing the memory.
110 *
111 * This function searches the specified directory for files that match the given rootname, suffix,
112 * date range, and filter pattern. It supports multiple file extensions and can return files in either
113 * increasing or decreasing order based on the start time parameter in the files.
114 */
115char **find_files_between_dates(char *directory, char *rootname, char *suffix,
116 short startYear, short startMonth, short startDay, short startJDay, short endYear, short endMonth, short endDay, short endJDay, char *filter, char **extensionList, long extensions, long tailsOnly, long *files, long increaseOrder) {
117 short iyear, iday, toYearEnd, jDayLimit, year0, month0, day0, iday0, istoday, listall;
118 long yearLists, fileLists, newLists, i, index, tmpLists, j, *sortedIndex;
119 char **fileList, **yearList, **newList, **tmpList;
120 char buffer[2056], tmpstr[2056], format[2056];
121 double startTime, endTime, startHour, endHour, time0, hour0;
122 time_t longtime;
123
124 sortedIndex = NULL;
125 fileLists = yearLists = newLists = tmpLists = 0;
126 fileList = yearList = newList = tmpList = NULL;
127 if (!directory || !rootname || !filter) {
128 fprintf(stderr, "directory or rootname or filter for searching files is not given!\n");
129 exit(1);
130 }
131 startTime = endTime = 0;
132 startHour = endHour = 0;
133 time(&longtime);
134 TimeEpochToBreakdown(&year0, &iday0, &month0, &day0, &hour0, longtime);
135 if (endYear == year0) {
136 if (endJDay == iday0)
137 istoday = 1;
138 else if (endMonth == month0 && endDay == day0)
139 istoday = 1;
140 else
141 istoday = 0;
142 } else
143 istoday = 0;
144
145 if (strcmp(filter, "-??\??") == 0) {
146 if (tailsOnly)
147 sprintf(format, "%s%s-%s", rootname, "%d", "%02d%02d%s");
148 else
149 sprintf(format, "%s/%s%s-%s", directory, rootname, "%d", "%02d%02d%s");
150 } else if (strcmp(filter, "-??\?-??\??") == 0) {
151 if (tailsOnly)
152 sprintf(format, "%s%s-%s-%s", rootname, "%d", "%03d", "%02d%02d%s");
153 else
154 sprintf(format, "%s/%s%s-%s-%s", directory, rootname, "%d", "%03d", "%02d%02d%s");
155 } else
156 SDDS_Bomb("invalid filter given, should be -???? or -??\?-????");
157 if (!startJDay) {
158 TimeBreakdownToEpoch(startYear, 0, startMonth, startDay, startHour, &startTime);
159 TimeEpochToBreakdown(&startYear, &startJDay, &startMonth, &startDay, &startHour, startTime);
160 }
161 if (!endJDay) {
162 TimeBreakdownToEpoch(endYear, 0, endMonth, endDay, endHour, &endTime);
163 TimeEpochToBreakdown(&endYear, &endJDay, &endMonth, &endDay, &endHour, endTime);
164 }
165 if (extensions) {
166 tmpList = (char **)malloc(sizeof(tmpList) * extensions);
167 for (i = 0; i < extensions; i++) {
168 if (suffix)
169 sprintf(tmpstr, "%s%s", suffix, extensionList[i]);
170 else
171 sprintf(tmpstr, "%s", extensionList[i]);
172 SDDS_CopyString(&tmpList[i], tmpstr);
173 }
174 tmpLists = extensions;
175 } else {
176 tmpLists = 1;
177 tmpList = (char **)malloc(sizeof(tmpList));
178 if (suffix)
179 SDDS_CopyString(&tmpList[0], suffix);
180 else
181 SDDS_CopyString(&tmpList[0], "");
182 }
183 startYear = make_four_digit_year(startYear);
184 endYear = make_four_digit_year(endYear);
185 for (iyear = startYear; iyear <= endYear; iyear++) {
186 if (iyear != endYear) {
187 toYearEnd = 1;
188 if (is_leap_year(iyear))
189 jDayLimit = 366;
190 else
191 jDayLimit = 365;
192 } else {
193 toYearEnd = 0;
194 jDayLimit = endJDay;
195 }
196 if (startJDay == 1 && (toYearEnd == 1 || istoday == 1))
197 listall = 1;
198 else
199 listall = 0;
200
201 for (i = 0; i < tmpLists; i++) {
202 sprintf(tmpstr, "%s%d%s%s", rootname, iyear, filter, tmpList[i]);
203 yearList = ls_dir(directory, tmpstr, tailsOnly, &yearLists);
204 if (yearLists) {
205 if (listall) {
206 fileList = SDDS_Realloc(fileList, sizeof(*fileList) * (fileLists + yearLists));
207 for (j = 0; j < yearLists; j++)
208 SDDS_CopyString(&fileList[j + fileLists], yearList[j]);
209 fileLists += yearLists;
210 } else {
211 for (iday = startJDay; iday <= jDayLimit; iday++) {
212 TimeBreakdownToEpoch(iyear, iday, 0, 0, 1, &time0);
213 TimeEpochToBreakdown(&year0, &iday0, &month0, &day0, &hour0, time0);
214 if (year0 != iyear || iday0 != iday || hour0 != 1) {
215 fprintf(stderr, "Error in getting the julian date!\n");
216 exit(1);
217 }
218 if (strcmp(filter, "-??\??") == 0)
219 sprintf(buffer, format, iyear, month0, day0, tmpList[i]);
220 else
221 sprintf(buffer, format, iyear, iday0, month0, day0, tmpList[i]);
222 if (has_wildcards(tmpList[i])) {
223 for (j = 0; j < yearLists; j++) {
224 if (wild_match(yearList[j], buffer)) {
225 newList = SDDS_Realloc(newList, sizeof(*newList) * (newLists + 1));
226 SDDS_CopyString(&newList[newLists], yearList[j]);
227 newLists++;
228 }
229 }
230 } else {
231 index = match_string(buffer, yearList, yearLists, EXACT_MATCH);
232 if (index >= 0) {
233 newList = SDDS_Realloc(newList, sizeof(*newList) * (newLists + 1));
234 SDDS_CopyString(&newList[newLists], buffer);
235 newLists++;
236 }
237 }
238 } /*end of for iday loop */
239 if (newLists) {
240 fileList = SDDS_Realloc(fileList, sizeof(*fileList) * (fileLists + newLists));
241 for (j = 0; j < newLists; j++) {
242 SDDS_CopyString(&fileList[j + fileLists], newList[j]);
243 free(newList[j]);
244 }
245 free(newList);
246 fileLists += newLists;
247 newLists = 0;
248 newList = NULL;
249 }
250 } /*end of if startJDay==1 */
251 /*free yearList */
252 SDDS_FreeStringArray(yearList, yearLists);
253 free(yearList);
254 yearLists = 0;
255 yearList = NULL;
256 } /*end of if yearLists */
257 } /*end of tmpLists loop */
258 startJDay = 1;
259 } /*end of iyear loop */
260 sortedIndex = sort_and_return_index(fileList, SDDS_STRING, fileLists, increaseOrder);
261 free(sortedIndex);
262 *files = fileLists;
263 for (i = 0; i < tmpLists; i++)
264 free(tmpList[i]);
265 if (tmpList)
266 free(tmpList);
267 return fileList;
268}
269#endif
270
271/*sort the files by the value of StartTime parameter, assume that all files
272 contain StartTime parameter;
273 isTail -- 1 , the fileList is only tail of file, need add directory
274 otherwise -- need not add directory*/
275void sort_files_by_start_time(char *directory, long isTail, char **fileList, long files, long increaseOrder) {
276 long i, *index = NULL;
277 double *startTime;
278 SDDS_DATASET dataset;
279 char **tmpList;
280 char buffer[2046];
281
282 tmpList = fileList;
283 startTime = NULL;
284 if (!fileList || !files || files == 1)
285 return;
286 startTime = (double *)calloc(sizeof(*startTime), files);
287 for (i = 0; i < files; i++) {
288 if (isTail && directory) {
289 sprintf(buffer, "%s/%s", directory, fileList[i]);
290 if (!SDDS_InitializeInput(&dataset, buffer)) {
291 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
292 exit(1);
293 }
294 } else {
295 if (!SDDS_InitializeInput(&dataset, fileList[i])) {
296 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
297 exit(1);
298 }
299 }
300 if (SDDS_ReadPage(&dataset) < 0) {
301 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
302 exit(1);
303 }
304 if (!SDDS_GetParameterAsDouble(&dataset, "StartTime", &startTime[i])) {
305 fprintf(stderr, "Unable to get StartTime parameter from %s file.\n", fileList[i]);
306 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
307 exit(1);
308 }
309 if (!SDDS_Terminate(&dataset)) {
310 SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
311 exit(1);
312 }
313 }
314 index = sort_and_return_index(startTime, SDDS_DOUBLE, files, increaseOrder);
315 for (i = 0; i < files; i++)
316 fileList[i] = tmpList[index[i]];
317}
SDDS (Self Describing Data Set) Data Types Definitions and Function Prototypes.
double * SDDS_GetParameterAsDouble(SDDS_DATASET *SDDS_dataset, char *parameter_name, double *memory)
Retrieves the value of a specified parameter as a double from the current data table of an SDDS datas...
int32_t SDDS_InitializeInput(SDDS_DATASET *SDDS_dataset, char *filename)
Definition SDDS_input.c:49
int32_t SDDS_Terminate(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_ReadPage(SDDS_DATASET *SDDS_dataset)
int32_t SDDS_FreeStringArray(char **string, int64_t strings)
Frees an array of strings by deallocating each individual string.
void SDDS_PrintErrors(FILE *fp, int32_t mode)
Prints recorded error messages to a specified file stream.
Definition SDDS_utils.c:432
void SDDS_Bomb(char *message)
Terminates the program after printing an error message and recorded errors.
Definition SDDS_utils.c:342
int32_t SDDS_CopyString(char **target, const char *source)
Copies a source string to a target string with memory allocation.
Definition SDDS_utils.c:856
void * SDDS_Realloc(void *old_ptr, size_t new_size)
Reallocates memory to a new size.
Definition SDDS_utils.c:677
#define SDDS_STRING
Identifier for the string data type.
Definition SDDStypes.h:85
#define SDDS_DOUBLE
Identifier for the double data type.
Definition SDDStypes.h:37
long fexists(const char *filename)
Checks if a file exists.
Definition fexists.c:27
short make_four_digit_year(short year)
Converts a two-digit year into a four-digit year.
Definition find_files.c:22
char ** find_files_between_dates(char *directory, char *rootname, char *suffix, short startYear, short startMonth, short startDay, short startJDay, short endYear, short endMonth, short endDay, short endJDay, char *filter, char **extensionList, long extensions, long tailsOnly, long *files, long increaseOrder)
Finds files in a directory that match specified criteria within a date range.
Definition find_files.c:115
long match_string(char *string, char **option, long n_options, long mode)
Matches a given string against an array of option strings based on specified modes.
long * sort_and_return_index(void *data, long type, long rows, long increaseOrder)
Sort data and return the sorted index.
short TimeEpochToBreakdown(short *year, short *jDay, short *month, short *day, double *hour, double epochTime)
Breaks down epoch time into its constituent components.
short TimeBreakdownToEpoch(short year, short jDay, short month, short day, double hour, double *epochTime)
Converts a broken-down time into epoch time.
int has_wildcards(char *template)
Check if a template string contains any wildcard characters.
Definition wild_match.c:498
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.
Definition wild_match.c:49