SDDSlib
Loading...
Searching...
No Matches
get_token_tq.c
Go to the documentation of this file.
1/**
2 * @file get_token_tq.c
3 * @brief Functions for parsing tokens from strings with support for delimiters and quotations.
4 *
5 * This file provides implementations for extracting tokens from character strings
6 * based on specified delimiter sets and quotation marks. It includes functions to
7 * handle nested quotations and escaped characters, facilitating robust tokenization
8 * of complex strings.
9 *
10 * @copyright
11 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
12 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
13 *
14 * @license
15 * This file is distributed under the terms of the Software License Agreement
16 * found in the file LICENSE included with this distribution.
17 *
18 * @author M. Borland, C. Saunders, R. Soliday
19 */
20
21#include "mdb.h"
22#include <ctype.h>
23
24char *seek_level(char *s, char qs, char qe);
25
26int in_charset(char c, char *s);
27
28/**
29 * @brief Extracts a token from a string based on delimiter characters.
30 *
31 * The `get_token_t` function retrieves a token from the input string `s`, where a token
32 * is defined as a sequence of characters bounded by any of the delimiter characters
33 * specified in the string `t`. It intelligently handles quoted sections, allowing delimiters
34 * within quotes to be part of the token.
35 *
36 * @param s The input string to parse.
37 * @param t A string containing delimiter characters.
38 * @return A dynamically allocated string containing the extracted token, or NULL if no token is found.
39 */
40char *get_token_t(char *s, char *t) {
41 char *ptr0, *ptr1, *ptr;
42
43 /* skip all leading characters of s found in string t */
44 ptr0 = s;
45 while (in_charset(*s, t) && *s)
46 s++;
47 if (*s == 0)
48 return (NULL);
49 ptr1 = s;
50
51 /* skip to next character of s found in t, skipping over quoted */
52 /* portions */
53 do {
54 if (*s == '"' && !(s != ptr0 && *(s - 1) == '\\')) {
55 s++;
56 while (*s && !(*s == '"' && *(s - 1) != '\\'))
57 s++;
58 if (*s == '"')
59 s++;
60 } else
61 s++;
62 } while (*s && !in_charset(*s, t));
63
64 ptr = tmalloc(sizeof(*ptr) * (s - ptr1 + 1));
65 strncpy(ptr, ptr1, s - ptr1);
66 ptr[s - ptr1] = 0;
67
68 strcpy_ss(ptr0, s);
69
71 return (ptr);
72}
73
74/* routine: in_charset()
75 * purpose: determine if character is a member of a set of characters.
76 * Returns the number of the character that matches, or zero
77 * for no match.
78 */
79
80int in_charset(char c, char *set) {
81 register int i;
82
83 i = 1;
84 while (*set) {
85 if (*set == c)
86 return (i);
87 set++;
88 i++;
89 }
90 return (0);
91}
92
93/**
94 * @brief Extracts a token from a string with support for multiple delimiter and quotation sets.
95 *
96 * The `get_token_tq` function retrieves a token from the input string `s` based on two sets of
97 * delimiter characters (`ts` for token start and `te` for token end) and two sets of quotation
98 * characters (`qs` for quotation start and `qe` for quotation end). It ensures that delimiters
99 * within quoted sections are ignored, allowing for nested or paired quotations.
100 *
101 * @param s The input string to parse.
102 * @param ts A string containing token start delimiter characters.
103 * @param te A string containing token end delimiter characters.
104 * @param qs A string containing quotation start characters.
105 * @param qe A string containing quotation end characters.
106 * @return A dynamically allocated string containing the extracted token, or NULL if no token is found.
107 */
108char *get_token_tq(s, ts, te, qs, qe) char *s; /* the string to be scanned */
109char *ts, *te; /* strings of token start and end characters */
110char *qs, *qe; /* strings of starting and ending quotation characters */
111{
112 register char *ptr0, *ptr1, *ptr;
113 register int in_quotes;
114
115 /* skip all leading characters of s found in string t */
116 ptr0 = s;
117 while (*s && in_charset(*s, ts) && !in_charset(*s, qs)) {
118 s++;
119 }
120 if (*s == 0)
121 return (NULL);
122 ptr1 = s;
123 if ((in_quotes = in_charset(*s, qs)))
124 s++;
125
126 /* skip to next character of s found in t */
127 do {
128 if (in_quotes) {
129 if ((ptr = seek_level(s, *(qs + in_quotes - 1), *(qe + in_quotes - 1)))) {
130 s = ptr;
131 in_quotes = 0;
132 } else {
133 s += strlen(s);
134 in_quotes = 0;
135 }
136 } else {
137 in_quotes = in_charset(*s, qs);
138 s++;
139 }
140 } while (*s && (in_quotes || !in_charset(*s, te)));
141
142 ptr = tmalloc((unsigned)sizeof(*ptr) * (s - ptr1 + 1));
143 strncpy(ptr, ptr1, s - ptr1);
144 ptr[s - ptr1] = 0;
145
146 if (*s)
147 strcpy_ss(ptr0, s + 1);
148 else
149 *ptr0 = *s;
150
152 return (ptr);
153}
154
155/* routine: seek_level()
156 * purpose: seek through a string to find the place where a parenthesis
157 * or quotation-mark nesting of zero occurs, assuming that
158 * a nesting of 1 existed prior to the call. Note that if
159 * qs=qe, the routine simply finds the next quotation-mark.
160 * The routine returns a pointer to the place in the string
161 * where the nesting is zero (after the last quotation-mark
162 * or parenthesis).
163 */
164
165char *seek_level(char *s, char qs, char qe) {
166 register int qlevel;
167 char *ptr0;
168
169 ptr0 = s;
170 qlevel = 1;
171 while (*s && qlevel) {
172 if (*s == qe && !(s != ptr0 && *(s - 1) == '\\'))
173 qlevel--;
174 else if (*s == qs && !(s != ptr0 && *(s - 1) == '\\'))
175 qlevel++;
176 s++;
177 }
178 if (qlevel == 0)
179 return (s);
180 return (NULL);
181}
182
183/**
184 * @brief Processes a string to interpret and replace escaped quotation marks.
185 *
186 * The `interpret_escaped_quotes` function scans the input string `s` and replaces any escaped
187 * quotation marks (e.g., `\"`) with actual quotation marks (`"`), effectively removing the escape
188 * character and preserving the intended quote in the string.
189 *
190 * @param s The string in which to interpret escaped quotes.
191 */
193 char *ptr;
194
195 ptr = s;
196 while (*ptr) {
197 if (*ptr == '\\' && *(ptr + 1) == '"')
198 strcpy_ss(ptr, ptr + 1);
199 else
200 ptr++;
201 }
202}
void * tmalloc(uint64_t size_of_block)
Allocates a memory block of the specified size with zero initialization.
Definition array.c:59
char * get_token_tq(char *s, char *ts, char *te, char *qs, char *qe)
Extracts a token from a string with support for multiple delimiter and quotation sets.
void interpret_escaped_quotes(char *s)
Processes a string to interpret and replace escaped quotation marks.
char * get_token_t(char *s, char *t)
Extracts a token from a string based on delimiter characters.
char * strcpy_ss(char *dest, const char *src)
Safely copies a string, handling memory overlap.
Definition str_copy.c:34