SDDSlib
Loading...
Searching...
No Matches
wild_match.c
Go to the documentation of this file.
1/**
2 * @file wild_match.c
3 * @brief Wildcard matching and string utility functions.
4 *
5 * This file provides functions to perform wildcard pattern matching, expand range
6 * specifiers within templates, check for the presence of wildcard characters, and
7 * manipulate strings with respect to wildcard handling.
8 *
9 * @copyright
10 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
11 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
12 *
13 * @license
14 * This file is distributed under the terms of the Software License Agreement
15 * found in the file LICENSE included with this distribution.
16 *
17 * @author M. Borland, C Saunders, R. Soliday, H. Shang
18 */
19
20#if defined(_WIN32)
21# include <stdlib.h>
22#endif
23#include <ctype.h>
24#include "mdb.h"
25#define MATCH_INVERT '^'
26#define MATCH_MANY '*'
27#define MATCH_SET1 '['
28#define MATCH_SET2 ']'
29#define ESCAPE_CHAR '\\'
30#define SET_MATCH_INVERT '^'
31
32#ifdef VAX_VMS
33# define MATCH_ONE '%'
34#else
35# define MATCH_ONE '?'
36#endif
37
38/**
39 * @brief Determine whether one string is a wildcard match for another.
40 *
41 * Compares the input string against a template that may include wildcard characters
42 * such as '*', '?', and character sets defined within brackets. Supports inversion
43 * of match results using the '^' character.
44 *
45 * @param string The string to be matched.
46 * @param template The wildcard pattern to match against.
47 * @return Returns 1 if the string matches the template, 0 otherwise.
48 */
49int wild_match(char *string, char *template) {
50 char *s, *t, *ptr;
51 int len, at_least, invert_set_match, invert_match;
52
53#ifdef DEBUG
54 printf("wild_match(%s, %s)\n", string, template);
55#endif
56
57 s = string; /* string to check for match */
58 len = strlen(s);
59 t = template; /* template string, with possible wildcards */
60 if (*t == MATCH_INVERT) {
61 invert_match = -1;
62 t++;
63 } else
64 invert_match = 0;
65
66 if (!*s) {
67 if (!*t)
68 return (!invert_match);
69 return invert_match;
70 } else if (!*t) {
71 return invert_match;
72 }
73
74 do {
75#ifdef DEBUG
76 printf("s = %s, t = %s\n", s, t);
77#endif
78 switch (*t) {
79 case ESCAPE_CHAR:
80 t++;
81 if (*t++ != *s++)
82 return (invert_match);
83 len--;
84 break;
85 case MATCH_MANY:
86 at_least = 0;
87 while (*t) {
88 if (*t == MATCH_MANY)
89 t++;
90 else if (*t == MATCH_ONE) {
91 at_least++;
92 t++;
93 } else
94 break;
95 }
96 if (at_least > len)
97 return (invert_match);
98 s += at_least;
99 if (*t == 0 && *(t - 1) == MATCH_MANY) {
100#ifdef DEBUG
101 printf("return(1)\n");
102#endif
103 return (!invert_match);
104 }
105 ptr = s;
106 while (*ptr) {
107 if (wild_match(ptr, t)) {
108#ifdef DEBUG
109 printf("return(2)\n");
110#endif
111 return (!invert_match);
112 }
113 ptr++;
114 }
115 ptr = s;
116 while ((ptr = strchr(ptr, *t))) {
117 if (wild_match(ptr + 1, t + 1)) {
118#ifdef DEBUG
119 printf("return(3)\n");
120#endif
121 return (!invert_match);
122 }
123 if (*++ptr)
124 ++ptr;
125 }
126 return (invert_match);
127 case MATCH_ONE:
128 s++;
129 t++;
130 len--;
131 break;
132 case MATCH_SET1:
133 ptr = NULL;
134 if (!*(t + 1) || !(ptr = strchr(t + 1, MATCH_SET2))) {
135 if (*++t != *++s)
136 return (invert_match);
137 len--;
138 } else {
139 *ptr++ = 0;
140 t++;
141 SWAP_PTR(ptr, t);
142 invert_set_match = 0;
143 if (*ptr == SET_MATCH_INVERT && strlen(ptr) != 1) {
144 invert_set_match = 1;
145 ptr++;
146 }
147 if (strchr(ptr, *s)) {
148 *(t - 1) = MATCH_SET2;
149 if (invert_set_match)
150 return (invert_match);
151 s++;
152 len--;
153 } else {
154 *(t - 1) = MATCH_SET2;
155 if (!invert_set_match)
156 return (invert_match);
157 s++;
158 len--;
159 }
160 }
161 break;
162 default:
163 if (*s++ != *t++)
164 return (invert_match);
165 len--;
166 break;
167 }
168 } while (*s && *t);
169#ifdef DEBUG
170 printf("s = %s, t = %s\n", s, t);
171#endif
172 if (!*s && !*t) {
173#ifdef DEBUG
174 printf("return(5)\n");
175#endif
176 return (!invert_match);
177 }
178 if (!*s && *t == MATCH_MANY && !*(t + 1)) {
179#ifdef DEBUG
180 printf("return(4)\n");
181#endif
182 return (!invert_match);
183 }
184 if (*s && !*t) {
185#ifdef DEBUG
186 printf("return(6)\n");
187#endif
188 return (invert_match);
189 }
190 if (!*s && *t) {
191 while (*t) {
192 if (*t != MATCH_MANY) {
193#ifdef DEBUG
194 printf("return(7)\n");
195#endif
196 return (invert_match);
197 }
198 t++;
199 }
200#ifdef DEBUG
201 printf("return(8)\n");
202#endif
203 return !invert_match;
204 }
205 bomb("the impossible has happened (wild_match)", NULL);
206 exit(1);
207}
208
209/**
210 * @brief Determine whether one string is a case-insensitive wildcard match for another.
211 *
212 * Similar to `wild_match`, but performs case-insensitive comparisons between the
213 * input string and the template. Handles wildcard characters and supports inversion
214 * of match results.
215 *
216 * @param string The string to be matched, case-insensitively.
217 * @param template The wildcard pattern to match against, case-insensitively.
218 * @return Returns 1 if the string matches the template, 0 otherwise.
219 */
220int wild_match_ci(char *string, char *template) {
221 char *s, *t, *ptr;
222 int len, at_least, invert_set_match, invert_match;
223
224 s = string; /* string to check for match */
225 len = strlen(s);
226 t = template; /* template string, with possible wildcards */
227 if (*t == MATCH_INVERT) {
228 invert_match = -1;
229 t++;
230 } else
231 invert_match = 0;
232
233 if (!*s) {
234 if (!*t)
235 return (!invert_match);
236 return invert_match;
237 } else if (!*t) {
238 return invert_match;
239 }
240
241 do {
242#ifdef DEBUG
243 printf("s = %s, t = %s\n", s, t);
244#endif
245 switch (*t) {
246 case ESCAPE_CHAR:
247 t++;
248 if (tolower(*t) != tolower(*s))
249 return (invert_match);
250 t++;
251 s++;
252 len--;
253 break;
254 case MATCH_MANY:
255 at_least = 0;
256 while (*t) {
257 if (*t == MATCH_MANY)
258 t++;
259 else if (*t == MATCH_ONE) {
260 at_least++;
261 t++;
262 } else
263 break;
264 }
265 if (at_least > len)
266 return (invert_match);
267 s += at_least;
268 if (*t == 0 && *(t - 1) == MATCH_MANY) {
269#ifdef DEBUG
270 printf("return(1)\n");
271#endif
272 return (!invert_match);
273 }
274 ptr = s;
275 while (*ptr) {
276 if (wild_match_ci(ptr, t)) {
277#ifdef DEBUG
278 printf("return(2)\n");
279#endif
280 return (!invert_match);
281 }
282 ptr++;
283 }
284 ptr = s;
285 while ((ptr = strchr_ci(ptr, *t))) {
286 if (wild_match_ci(ptr + 1, t + 1)) {
287#ifdef DEBUG
288 printf("return(3)\n");
289#endif
290 return (!invert_match);
291 }
292 if (*++ptr)
293 ++ptr;
294 }
295 return (invert_match);
296 case MATCH_ONE:
297 s++;
298 t++;
299 len--;
300 break;
301 case MATCH_SET1:
302 ptr = NULL;
303 if (!*(t + 1) || !(ptr = strchr_ci(t + 1, MATCH_SET2))) {
304 if (tolower(*t) != tolower(*s))
305 return (invert_match);
306 t++;
307 s++;
308 len--;
309 } else {
310 *ptr++ = 0;
311 t++;
312 SWAP_PTR(ptr, t);
313 invert_set_match = 0;
314 if (*ptr == SET_MATCH_INVERT && strlen(ptr) != 1) {
315 invert_set_match = 1;
316 ptr++;
317 }
318 if (strchr_ci(ptr, *s)) {
319 *(t - 1) = MATCH_SET2;
320 if (invert_set_match)
321 return (invert_match);
322 s++;
323 len--;
324 } else {
325 *(t - 1) = MATCH_SET2;
326 if (!invert_set_match)
327 return (invert_match);
328 s++;
329 len--;
330 }
331 }
332 break;
333 default:
334 if (tolower(*s) != tolower(*t))
335 return (invert_match);
336 t++;
337 s++;
338 len--;
339 break;
340 }
341 } while (*s && *t);
342#ifdef DEBUG
343 printf("s = %s, t = %s\n", s, t);
344#endif
345 if (!*s && !*t) {
346#ifdef DEBUG
347 printf("return(5)\n");
348#endif
349 return (!invert_match);
350 }
351 if (!*s && *t == MATCH_MANY && !*(t + 1)) {
352#ifdef DEBUG
353 printf("return(4)\n");
354#endif
355 return (!invert_match);
356 }
357 if (*s && !*t) {
358#ifdef DEBUG
359 printf("return(6)\n");
360#endif
361 return (invert_match);
362 }
363 if (!*s && *t) {
364 while (*t) {
365 if (*t != MATCH_MANY) {
366#ifdef DEBUG
367 printf("return(7)\n");
368#endif
369 return (invert_match);
370 }
371 t++;
372 }
373#ifdef DEBUG
374 printf("return(8)\n");
375#endif
376 return !invert_match;
377 }
378 bomb("the impossible has happened (wild_match_ci)", NULL);
379 exit(1);
380}
381
382/**
383 * @brief Compare two strings case-insensitively.
384 *
385 * Performs a case-insensitive comparison between two strings. Returns 0 if the strings
386 * are equal, a negative value if the first non-matching character in `s1` is lower
387 * than that in `s2`, and a positive value if it is greater.
388 *
389 * @param s The first string to compare.
390 * @param t The second string to compare.
391 * @return An integer indicating the relationship between the strings:
392 * -1 if `s1` < `s2`,
393 * 0 if `s1` == `s2`,
394 * 1 if `s1` > `s2`.
395 */
396int strcmp_ci(const char *s, const char *t) {
397 char sc, tc;
398 while (*s && *t) {
399 if ((sc = tolower(*s)) < (tc = tolower(*t)))
400 return -1;
401 if (sc > tc)
402 return 1;
403 s++;
404 t++;
405 }
406 return 0;
407}
408
409char *strchr_ci(char *s, char c) {
410 c = tolower(c);
411 while (*s) {
412 if (tolower(*s) == c)
413 return s;
414 s++;
415 }
416 return NULL;
417}
418
419/**
420 * @brief Expand range specifiers in a wildcard template into explicit character lists.
421 *
422 * Processes a wildcard template containing range specifiers (e.g., `[a-e0-5]`) and
423 * expands them into explicit lists of characters (e.g., `[abcde012345]`). This
424 * function should be called before performing wildcard matching with `wild_match()`.
425 *
426 * @param template The wildcard template containing range specifiers.
427 * @return A newly allocated string with all range specifiers expanded into explicit lists.
428 */
429char *expand_ranges(char *template) {
430 char *new, *ptr, *ptr1, *ptr2, *end_new;
431 int n_in_range;
432
433 end_new = new = tmalloc(sizeof(*new) * (strlen(template) + 1));
434 *new = 0;
435 ptr = tmalloc(sizeof(*new) * (strlen(template) + 1));
436 strcpy(ptr, template);
437 while (*ptr) {
438 if (*ptr == ESCAPE_CHAR) {
439 *end_new++ = *ptr++;
440 if (*ptr)
441 *end_new++ = *ptr++;
442 *end_new = 0;
443 } else if (*ptr == MATCH_SET1) {
444 *end_new++ = *ptr++;
445 if ((ptr1 = strchr(ptr, MATCH_SET2))) {
446 *ptr1 = 0;
447 ptr2 = ptr;
448 while (*ptr2) {
449 *end_new++ = *ptr2++;
450 *end_new = 0;
451 if (*ptr2 == '-') {
452 if (*(ptr2 - 1) == ESCAPE_CHAR) {
453 *(end_new - 1) = '-';
454 *end_new = 0;
455 ptr2++;
456 } else {
457 ptr2++;
458 n_in_range = (*ptr2) - (*(ptr2 - 2));
459 if (n_in_range <= 0) {
460 fprintf(stderr, "error: bad range syntax: %s\n", ptr - 2);
461 exit(1);
462 }
463 new = trealloc(new, sizeof(*new) * (strlen(new) + n_in_range + strlen(ptr1 + 1) + 2));
464 end_new = new + strlen(new);
465 while (n_in_range--)
466 *end_new++ = *ptr2 - n_in_range;
467 *end_new = 0;
468 ptr2++;
469 }
470 }
471 }
472 *end_new++ = *ptr1 = MATCH_SET2;
473 *end_new = 0;
474 ptr = ptr1 + 1;
475 } else {
476 *end_new++ = *(ptr - 1);
477 *end_new = 0;
478 }
479 } else {
480 *end_new++ = *ptr++;
481 *end_new = 0;
482 }
483 }
484 *end_new = 0;
485 return (new);
486}
487
488/**
489 * @brief Check if a template string contains any wildcard characters.
490 *
491 * Scans the input template string to determine if it includes any wildcard characters
492 * such as '*', '?', or character sets defined within brackets. Escaped wildcard
493 * characters (preceded by a backslash) are ignored.
494 *
495 * @param template The string to check for wildcard characters.
496 * @return Returns 1 if any unescaped wildcard characters are found, 0 otherwise.
497 */
498int has_wildcards(char *template) {
499 char *ptr;
500
501 ptr = template;
502 while ((ptr = strchr(ptr, MATCH_MANY))) {
503 if (ptr == template || *(ptr - 1) != ESCAPE_CHAR)
504 return (1);
505 ptr++;
506 }
507
508 ptr = template;
509 while ((ptr = strchr(ptr, MATCH_ONE))) {
510 if (ptr == template || *(ptr - 1) != ESCAPE_CHAR)
511 return (1);
512 ptr++;
513 }
514
515 ptr = template;
516 while ((ptr = strchr(ptr, MATCH_SET1))) {
517 if (ptr == template || *(ptr - 1) != ESCAPE_CHAR)
518 return (1);
519 ptr++;
520 }
521
522 return (0);
523}
524
525/**
526 * @brief Remove escape characters from wildcard characters in a template string.
527 *
528 * Processes the input template string and removes backslashes preceding wildcard
529 * characters ('*', '?', '[', ']'). This function modifies the string in place.
530 *
531 * @param template The wildcard template string to unescape.
532 * @return A pointer to the modified template string with escape characters removed.
533 */
534char *unescape_wildcards(char *template) {
535 char *ptr;
536
537 ptr = template;
538 while ((ptr = strchr(ptr, MATCH_MANY))) {
539 if (ptr != template && *(ptr - 1) == ESCAPE_CHAR) {
540 strcpy_ss(ptr - 1, ptr);
541 }
542 ptr++;
543 }
544 ptr = template;
545 while ((ptr = strchr(ptr, MATCH_ONE))) {
546 if (ptr != template && *(ptr - 1) == ESCAPE_CHAR) {
547 strcpy_ss(ptr - 1, ptr);
548 }
549 ptr++;
550 }
551 ptr = template;
552 while ((ptr = strchr(ptr, MATCH_SET1))) {
553 if (ptr != template && *(ptr - 1) == ESCAPE_CHAR) {
554 strcpy_ss(ptr - 1, ptr);
555 }
556 ptr++;
557 }
558 ptr = template;
559 while ((ptr = strchr(ptr, MATCH_SET2))) {
560 if (ptr != template && *(ptr - 1) == ESCAPE_CHAR) {
561 strcpy_ss(ptr - 1, ptr);
562 }
563 ptr++;
564 }
565 return (template);
566}
567
568/**
569 * @brief Compare two strings with a custom non-hierarchical ranking.
570 *
571 * Compares two strings where non-numeric characters are ranked before numeric characters.
572 * Numeric characters are compared based on their length, with single-digit numbers
573 * ranked before multi-digit numbers. The comparison returns -1, 0, or 1 depending
574 * on whether the first string is less than, equal to, or greater than the second string.
575 *
576 * @param s1 The first string to compare.
577 * @param s2 The second string to compare.
578 * @return An integer indicating the relationship between the strings:
579 * -1 if `s1` < `s2`,
580 * 0 if `s1` == `s2`,
581 * 1 if `s1` > `s2`.
582 */
583int strcmp_nh(const char *s1, const char *s2) {
584 int n1, n2, n3, n4;
585 int i;
586 for (; *s1 && *s2; s1++, s2++) {
587 if (((*s1 >= '0') && (*s1 <= '9')))
588 n1 = 1;
589 else
590 n1 = 0;
591 if (((*s2 >= '0') && (*s2 <= '9')))
592 n2 = 1;
593 else
594 n2 = 0;
595 if ((n1 == 1) && (n2 == 1)) {
596 i = 1;
597 while ((*(s1 + i)) && (*(s2 + i))) {
598 if (((*(s1 + i) >= '0') && (*(s1 + i) <= '9')))
599 n3 = 1;
600 else
601 n3 = 0;
602 if (((*(s2 + i) >= '0') && (*(s2 + i) <= '9')))
603 n4 = 1;
604 else
605 n4 = 0;
606 if ((n3 == 1) && (n4 == 0))
607 return (1);
608 else if ((n3 == 0) && (n4 == 1))
609 return (-1);
610 else if ((n3 == 0) && (n4 == 0))
611 break;
612 i++;
613 }
614 if (((*(s1 + i)) == 0) && (*(s2 + i))) {
615 if (((*(s2 + i) >= '0') && (*(s2 + i) <= '9')))
616 return (-1);
617 } else if (((*(s2 + i)) == 0) && (*(s1 + i))) {
618 if (((*(s1 + i) >= '0') && (*(s1 + i) <= '9')))
619 return (1);
620 }
621 }
622
623 if (n1 == n2) {
624 if (*s1 < *s2)
625 return (-1);
626 else if (*s1 > *s2)
627 return (1);
628 } else if (n1 == 0) {
629 return (-1);
630 } else {
631 return (1);
632 }
633 }
634 if (*s1)
635 return (1);
636 else if (*s2)
637 return (-1);
638 return (0);
639}
void * trealloc(void *old_ptr, uint64_t size_of_block)
Reallocates a memory block to a new size.
Definition array.c:181
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
void bomb(char *error, char *usage)
Reports error messages to the terminal and aborts the program.
Definition bomb.c:26
char * strcpy_ss(char *dest, const char *src)
Safely copies a string, handling memory overlap.
Definition str_copy.c:34
int has_wildcards(char *template)
Check if a template string contains any wildcard characters.
Definition wild_match.c:498
int wild_match_ci(char *string, char *template)
Determine whether one string is a case-insensitive wildcard match for another.
Definition wild_match.c:220
int strcmp_ci(const char *s, const char *t)
Compare two strings case-insensitively.
Definition wild_match.c:396
char * expand_ranges(char *template)
Expand range specifiers in a wildcard template into explicit character lists.
Definition wild_match.c:429
int wild_match(char *string, char *template)
Determine whether one string is a wildcard match for another.
Definition wild_match.c:49
char * unescape_wildcards(char *template)
Remove escape characters from wildcard characters in a template string.
Definition wild_match.c:534
int strcmp_nh(const char *s1, const char *s2)
Compare two strings with a custom non-hierarchical ranking.
Definition wild_match.c:583