root/cherokee/trunk/cherokee/pcre/pcre_get.c

Revision 905, 14.8 kB (checked in by alo, 1 year ago)

--

Line 
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8                        Written by Philip Hazel
9            Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44
45
46 #include "local_config.h"
47 #include "pcre_internal.h"
48
49
50 /*************************************************
51 *           Find number for named string         *
52 *************************************************/
53
54 /* This function is used by the get_first_set() function below, as well
55 as being generally available. It assumes that names are unique.
56
57 Arguments:
58   code        the compiled regex
59   stringname  the name whose number is required
60
61 Returns:      the number of the named parentheses, or a negative number
62                 (PCRE_ERROR_NOSUBSTRING) if not found
63 */
64
65 int
66 pcre_get_stringnumber(const pcre *code, const char *stringname)
67 {
68 int rc;
69 int entrysize;
70 int top, bot;
71 uschar *nametable;
72
73 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
74   return rc;
75 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
76
77 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
78   return rc;
79 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
80   return rc;
81
82 bot = 0;
83 while (top > bot)
84   {
85   int mid = (top + bot) / 2;
86   uschar *entry = nametable + entrysize*mid;
87   int c = strcmp(stringname, (char *)(entry + 2));
88   if (c == 0) return (entry[0] << 8) + entry[1];
89   if (c > 0) bot = mid + 1; else top = mid;
90   }
91
92 return PCRE_ERROR_NOSUBSTRING;
93 }
94
95
96
97 /*************************************************
98 *     Find (multiple) entries for named string   *
99 *************************************************/
100
101 /* This is used by the get_first_set() function below, as well as being
102 generally available. It is used when duplicated names are permitted.
103
104 Arguments:
105   code        the compiled regex
106   stringname  the name whose entries required
107   firstptr    where to put the pointer to the first entry
108   lastptr     where to put the pointer to the last entry
109
110 Returns:      the length of each entry, or a negative number
111                 (PCRE_ERROR_NOSUBSTRING) if not found
112 */
113
114 int
115 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
116   char **firstptr, char **lastptr)
117 {
118 int rc;
119 int entrysize;
120 int top, bot;
121 uschar *nametable, *lastentry;
122
123 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
124   return rc;
125 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
126
127 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
128   return rc;
129 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
130   return rc;
131
132 lastentry = nametable + entrysize * (top - 1);
133 bot = 0;
134 while (top > bot)
135   {
136   int mid = (top + bot) / 2;
137   uschar *entry = nametable + entrysize*mid;
138   int c = strcmp(stringname, (char *)(entry + 2));
139   if (c == 0)
140     {
141     uschar *first = entry;
142     uschar *last = entry;
143     while (first > nametable)
144       {
145       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
146       first -= entrysize;
147       }
148     while (last < lastentry)
149       {
150       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
151       last += entrysize;
152       }
153     *firstptr = (char *)first;
154     *lastptr = (char *)last;
155     return entrysize;
156     }
157   if (c > 0) bot = mid + 1; else top = mid;
158   }
159
160 return PCRE_ERROR_NOSUBSTRING;
161 }
162
163
164
165 /*************************************************
166 *    Find first set of multiple named strings    *
167 *************************************************/
168
169 /* This function allows for duplicate names in the table of named substrings.
170 It returns the number of the first one that was set in a pattern match.
171
172 Arguments:
173   code         the compiled regex
174   stringname   the name of the capturing substring
175   ovector      the vector of matched substrings
176
177 Returns:       the number of the first that is set,
178                or the number of the last one if none are set,
179                or a negative number on error
180 */
181
182 static int
183 get_first_set(const pcre *code, const char *stringname, int *ovector)
184 {
185 const real_pcre *re = (const real_pcre *)code;
186 int entrysize;
187 char *first, *last;
188 uschar *entry;
189 if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
190   return pcre_get_stringnumber(code, stringname);
191 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
192 if (entrysize <= 0) return entrysize;
193 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
194   {
195   int n = (entry[0] << 8) + entry[1];
196   if (ovector[n*2] >= 0) return n;
197   }
198 return (first[0] << 8) + first[1];
199 }
200
201
202
203
204 /*************************************************
205 *      Copy captured string to given buffer      *
206 *************************************************/
207
208 /* This function copies a single captured substring into a given buffer.
209 Note that we use memcpy() rather than strncpy() in case there are binary zeros
210 in the string.
211
212 Arguments:
213   subject        the subject string that was matched
214   ovector        pointer to the offsets table
215   stringcount    the number of substrings that were captured
216                    (i.e. the yield of the pcre_exec call, unless
217                    that was zero, in which case it should be 1/3
218                    of the offset table size)
219   stringnumber   the number of the required substring
220   buffer         where to put the substring
221   size           the size of the buffer
222
223 Returns:         if successful:
224                    the length of the copied string, not including the zero
225                    that is put on the end; can be zero
226                  if not successful:
227                    PCRE_ERROR_NOMEMORY (-6) buffer too small
228                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
229 */
230
231 int
232 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
233   int stringnumber, char *buffer, int size)
234 {
235 int yield;
236 if (stringnumber < 0 || stringnumber >= stringcount)
237   return PCRE_ERROR_NOSUBSTRING;
238 stringnumber *= 2;
239 yield = ovector[stringnumber+1] - ovector[stringnumber];
240 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
241 memcpy(buffer, subject + ovector[stringnumber], yield);
242 buffer[yield] = 0;
243 return yield;
244 }
245
246
247
248 /*************************************************
249 *   Copy named captured string to given buffer   *
250 *************************************************/
251
252 /* This function copies a single captured substring into a given buffer,
253 identifying it by name. If the regex permits duplicate names, the first
254 substring that is set is chosen.
255
256 Arguments:
257   code           the compiled regex
258   subject        the subject string that was matched
259   ovector        pointer to the offsets table
260   stringcount    the number of substrings that were captured
261                    (i.e. the yield of the pcre_exec call, unless
262                    that was zero, in which case it should be 1/3
263                    of the offset table size)
264   stringname     the name of the required substring
265   buffer         where to put the substring
266   size           the size of the buffer
267
268 Returns:         if successful:
269                    the length of the copied string, not including the zero
270                    that is put on the end; can be zero
271                  if not successful:
272                    PCRE_ERROR_NOMEMORY (-6) buffer too small
273                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
274 */
275
276 int
277 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
278   int stringcount, const char *stringname, char *buffer, int size)
279 {
280 int n = get_first_set(code, stringname, ovector);
281 if (n <= 0) return n;
282 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
283 }
284
285
286
287 /*************************************************
288 *      Copy all captured strings to new store    *
289 *************************************************/
290
291 /* This function gets one chunk of store and builds a list of pointers and all
292 of the captured substrings in it. A NULL pointer is put on the end of the list.
293
294 Arguments:
295   subject        the subject string that was matched
296   ovector        pointer to the offsets table
297   stringcount    the number of substrings that were captured
298                    (i.e. the yield of the pcre_exec call, unless
299                    that was zero, in which case it should be 1/3
300                    of the offset table size)
301   listptr        set to point to the list of pointers
302
303 Returns:         if successful: 0
304                  if not successful:
305                    PCRE_ERROR_NOMEMORY (-6) failed to get store
306 */
307
308 int
309 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
310   const char ***listptr)
311 {
312 int i;
313 int size = sizeof(char *);
314 int double_count = stringcount * 2;
315 char **stringlist;
316 char *p;
317
318 for (i = 0; i < double_count; i += 2)
319   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
320
321 stringlist = (char **)(pcre_malloc)(size);
322 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
323
324 *listptr = (const char **)stringlist;
325 p = (char *)(stringlist + stringcount + 1);
326
327 for (i = 0; i < double_count; i += 2)
328   {
329   int len = ovector[i+1] - ovector[i];
330   memcpy(p, subject + ovector[i], len);
331   *stringlist++ = p;
332   p += len;
333   *p++ = 0;
334   }
335
336 *stringlist = NULL;
337 return 0;
338 }
339
340
341
342 /*************************************************
343 *   Free store obtained by get_substring_list    *
344 *************************************************/
345
346 /* This function exists for the benefit of people calling PCRE from non-C
347 programs that can call its functions, but not free() or (pcre_free)() directly.
348
349 Argument:   the result of a previous pcre_get_substring_list()
350 Returns:    nothing
351 */
352
353 void
354 pcre_free_substring_list(const char **pointer)
355 {
356 (pcre_free)((void *)pointer);
357 }
358
359
360
361 /*************************************************
362 *      Copy captured string to new store         *
363 *************************************************/
364
365 /* This function copies a single captured substring into a piece of new
366 store
367
368 Arguments:
369   subject        the subject string that was matched
370   ovector        pointer to the offsets table
371   stringcount    the number of substrings that were captured
372                    (i.e. the yield of the pcre_exec call, unless
373                    that was zero, in which case it should be 1/3
374                    of the offset table size)
375   stringnumber   the number of the required substring
376   stringptr      where to put a pointer to the substring
377
378 Returns:         if successful:
379                    the length of the string, not including the zero that
380                    is put on the end; can be zero
381                  if not successful:
382                    PCRE_ERROR_NOMEMORY (-6) failed to get store
383                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
384 */
385
386 int
387 pcre_get_substring(const char *subject, int *ovector, int stringcount,
388   int stringnumber, const char **stringptr)
389 {
390 int yield;
391 char *substring;
392 if (stringnumber < 0 || stringnumber >= stringcount)
393   return PCRE_ERROR_NOSUBSTRING;
394 stringnumber *= 2;
395 yield = ovector[stringnumber+1] - ovector[stringnumber];
396 substring = (char *)(pcre_malloc)(yield + 1);
397 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
398 memcpy(substring, subject + ovector[stringnumber], yield);
399 substring[yield] = 0;
400 *stringptr = substring;
401 return yield;
402 }
403
404
405
406 /*************************************************
407 *   Copy named captured string to new store      *
408 *************************************************/
409
410 /* This function copies a single captured substring, identified by name, into
411 new store. If the regex permits duplicate names, the first substring that is
412 set is chosen.
413
414 Arguments:
415   code           the compiled regex
416   subject        the subject string that was matched
417   ovector        pointer to the offsets table
418   stringcount    the number of substrings that were captured
419                    (i.e. the yield of the pcre_exec call, unless
420                    that was zero, in which case it should be 1/3
421                    of the offset table size)
422   stringname     the name of the required substring
423   stringptr      where to put the pointer
424
425 Returns:         if successful:
426                    the length of the copied string, not including the zero
427                    that is put on the end; can be zero
428                  if not successful:
429                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
430                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
431 */
432
433 int
434 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
435   int stringcount, const char *stringname, const char **stringptr)
436 {
437 int n = get_first_set(code, stringname, ovector);
438 if (n <= 0) return n;
439 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
440 }
441
442
443
444
445 /*************************************************
446 *       Free store obtained by get_substring     *
447 *************************************************/
448
449 /* This function exists for the benefit of people calling PCRE from non-C
450 programs that can call its functions, but not free() or (pcre_free)() directly.
451
452 Argument:   the result of a previous pcre_get_substring()
453 Returns:    nothing
454 */
455
456 void
457 pcre_free_substring(const char *pointer)
458 {
459 (pcre_free)((void *)pointer);
460 }
461
462 /* End of pcre_get.c */
Note: See TracBrowser for help on using the browser.