| 1 |
/************************************************* |
|---|
| 2 |
* Perl-Compatible Regular Expressions * |
|---|
| 3 |
*************************************************/ |
|---|
| 4 |
|
|---|
| 5 |
/* PCRE is a library of functions to support regular expressions whose syntax |
|---|
| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
|---|
| 7 |
|
|---|
| 8 |
Written by Philip Hazel |
|---|
| 9 |
Copyright (c) 1997-2007 University of Cambridge |
|---|
| 10 |
|
|---|
| 11 |
----------------------------------------------------------------------------- |
|---|
| 12 |
Redistribution and use in source and binary forms, with or without |
|---|
| 13 |
modification, are permitted provided that the following conditions are met: |
|---|
| 14 |
|
|---|
| 15 |
* Redistributions of source code must retain the above copyright notice, |
|---|
| 16 |
this list of conditions and the following disclaimer. |
|---|
| 17 |
|
|---|
| 18 |
* Redistributions in binary form must reproduce the above copyright |
|---|
| 19 |
notice, this list of conditions and the following disclaimer in the |
|---|
| 20 |
documentation and/or other materials provided with the distribution. |
|---|
| 21 |
|
|---|
| 22 |
* Neither the name of the University of Cambridge nor the names of its |
|---|
| 23 |
contributors may be used to endorse or promote products derived from |
|---|
| 24 |
this software without specific prior written permission. |
|---|
| 25 |
|
|---|
| 26 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|---|
| 27 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|---|
| 28 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 29 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|---|
| 30 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|---|
| 31 |
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|---|
| 32 |
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 33 |
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|---|
| 34 |
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|---|
| 35 |
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|---|
| 36 |
POSSIBILITY OF SUCH DAMAGE. |
|---|
| 37 |
----------------------------------------------------------------------------- |
|---|
| 38 |
*/ |
|---|
| 39 |
|
|---|
| 40 |
|
|---|
| 41 |
/* This module contains the external function pcre_compile(), along with |
|---|
| 42 |
supporting internal functions that are not used by other modules. */ |
|---|
| 43 |
|
|---|
| 44 |
|
|---|
| 45 |
#include "local_config.h" |
|---|
| 46 |
|
|---|
| 47 |
#define NLBLOCK cd /* Block containing newline information */ |
|---|
| 48 |
#define PSSTART start_pattern /* Field containing processed string start */ |
|---|
| 49 |
#define PSEND end_pattern /* Field containing processed string end */ |
|---|
| 50 |
|
|---|
| 51 |
#include "pcre_internal.h" |
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
|
|---|
| 55 |
/* Macro for setting individual bits in class bitmaps. */ |
|---|
| 56 |
|
|---|
| 57 |
#define SETBIT(a,b) a[b/8] |= (1 << (b%8)) |
|---|
| 58 |
|
|---|
| 59 |
/* Maximum length value to check against when making sure that the integer that |
|---|
| 60 |
holds the compiled pattern length does not overflow. We make it a bit less than |
|---|
| 61 |
INT_MAX to allow for adding in group terminating bytes, so that we don't have |
|---|
| 62 |
to check them every time. */ |
|---|
| 63 |
|
|---|
| 64 |
#define OFLOW_MAX (INT_MAX - 20) |
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 |
/************************************************* |
|---|
| 68 |
* Code parameters and static tables * |
|---|
| 69 |
*************************************************/ |
|---|
| 70 |
|
|---|
| 71 |
/* This value specifies the size of stack workspace that is used during the |
|---|
| 72 |
first pre-compile phase that determines how much memory is required. The regex |
|---|
| 73 |
is partly compiled into this space, but the compiled parts are discarded as |
|---|
| 74 |
soon as they can be, so that hopefully there will never be an overrun. The code |
|---|
| 75 |
does, however, check for an overrun. The largest amount I've seen used is 218, |
|---|
| 76 |
so this number is very generous. |
|---|
| 77 |
|
|---|
| 78 |
The same workspace is used during the second, actual compile phase for |
|---|
| 79 |
remembering forward references to groups so that they can be filled in at the |
|---|
| 80 |
end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE |
|---|
| 81 |
is 4 there is plenty of room. */ |
|---|
| 82 |
|
|---|
| 83 |
#define COMPILE_WORK_SIZE (4096) |
|---|
| 84 |
|
|---|
| 85 |
|
|---|
| 86 |
/* Table for handling escaped characters in the range '0'-'z'. Positive returns |
|---|
| 87 |
are simple data values; negative values are for special things like \d and so |
|---|
| 88 |
on. Zero means further processing is needed (for things like \x), or the escape |
|---|
| 89 |
is invalid. */ |
|---|
| 90 |
|
|---|
| 91 |
#ifndef EBCDIC /* This is the "normal" table for ASCII systems */ |
|---|
| 92 |
static const short int escapes[] = { |
|---|
| 93 |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
|---|
| 94 |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
|---|
| 95 |
'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */ |
|---|
| 96 |
-ESC_H, 0, 0, -ESC_K, 0, 0, 0, 0, /* H - O */ |
|---|
| 97 |
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, -ESC_V, -ESC_W, /* P - W */ |
|---|
| 98 |
-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ |
|---|
| 99 |
'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */ |
|---|
| 100 |
-ESC_h, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */ |
|---|
| 101 |
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, -ESC_v, -ESC_w, /* p - w */ |
|---|
| 102 |
0, 0, -ESC_z /* x - z */ |
|---|
| 103 |
}; |
|---|
| 104 |
|
|---|
| 105 |
#else /* This is the "abnormal" table for EBCDIC systems */ |
|---|
| 106 |
static const short int escapes[] = { |
|---|
| 107 |
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', |
|---|
| 108 |
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, |
|---|
| 109 |
/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', |
|---|
| 110 |
/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, |
|---|
| 111 |
/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', |
|---|
| 112 |
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 113 |
/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', |
|---|
| 114 |
/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, |
|---|
| 115 |
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, |
|---|
| 116 |
/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p, |
|---|
| 117 |
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, |
|---|
| 118 |
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, |
|---|
| 119 |
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, |
|---|
| 120 |
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 121 |
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', |
|---|
| 122 |
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, |
|---|
| 123 |
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 124 |
/* D0 */ '}', 0, -ESC_K, 0, 0, 0, 0, -ESC_P, |
|---|
| 125 |
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, |
|---|
| 126 |
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, |
|---|
| 127 |
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, |
|---|
| 128 |
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 129 |
/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 |
|---|
| 130 |
}; |
|---|
| 131 |
#endif |
|---|
| 132 |
|
|---|
| 133 |
|
|---|
| 134 |
/* Table of special "verbs" like (*PRUNE) */ |
|---|
| 135 |
|
|---|
| 136 |
typedef struct verbitem { |
|---|
| 137 |
const char *name; |
|---|
| 138 |
int len; |
|---|
| 139 |
int op; |
|---|
| 140 |
} verbitem; |
|---|
| 141 |
|
|---|
| 142 |
static verbitem verbs[] = { |
|---|
| 143 |
{ "ACCEPT", 6, OP_ACCEPT }, |
|---|
| 144 |
{ "COMMIT", 6, OP_COMMIT }, |
|---|
| 145 |
{ "F", 1, OP_FAIL }, |
|---|
| 146 |
{ "FAIL", 4, OP_FAIL }, |
|---|
| 147 |
{ "PRUNE", 5, OP_PRUNE }, |
|---|
| 148 |
{ "SKIP", 4, OP_SKIP }, |
|---|
| 149 |
{ "THEN", 4, OP_THEN } |
|---|
| 150 |
}; |
|---|
| 151 |
|
|---|
| 152 |
static int verbcount = sizeof(verbs)/sizeof(verbitem); |
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 |
/* Tables of names of POSIX character classes and their lengths. The list is |
|---|
| 156 |
terminated by a zero length entry. The first three must be alpha, lower, upper, |
|---|
| 157 |
as this is assumed for handling case independence. */ |
|---|
| 158 |
|
|---|
| 159 |
static const char *const posix_names[] = { |
|---|
| 160 |
"alpha", "lower", "upper", |
|---|
| 161 |
"alnum", "ascii", "blank", "cntrl", "digit", "graph", |
|---|
| 162 |
"print", "punct", "space", "word", "xdigit" }; |
|---|
| 163 |
|
|---|
| 164 |
static const uschar posix_name_lengths[] = { |
|---|
| 165 |
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; |
|---|
| 166 |
|
|---|
| 167 |
/* Table of class bit maps for each POSIX class. Each class is formed from a |
|---|
| 168 |
base map, with an optional addition or removal of another map. Then, for some |
|---|
| 169 |
classes, there is some additional tweaking: for [:blank:] the vertical space |
|---|
| 170 |
characters are removed, and for [:alpha:] and [:alnum:] the underscore |
|---|
| 171 |
character is removed. The triples in the table consist of the base map offset, |
|---|
| 172 |
second map offset or -1 if no second map, and a non-negative value for map |
|---|
| 173 |
addition or a negative value for map subtraction (if there are two maps). The |
|---|
| 174 |
absolute value of the third field has these meanings: 0 => no tweaking, 1 => |
|---|
| 175 |
remove vertical space characters, 2 => remove underscore. */ |
|---|
| 176 |
|
|---|
| 177 |
static const int posix_class_maps[] = { |
|---|
| 178 |
cbit_word, cbit_digit, -2, /* alpha */ |
|---|
| 179 |
cbit_lower, -1, 0, /* lower */ |
|---|
| 180 |
cbit_upper, -1, 0, /* upper */ |
|---|
| 181 |
cbit_word, -1, 2, /* alnum - word without underscore */ |
|---|
| 182 |
cbit_print, cbit_cntrl, 0, /* ascii */ |
|---|
| 183 |
cbit_space, -1, 1, /* blank - a GNU extension */ |
|---|
| 184 |
cbit_cntrl, -1, 0, /* cntrl */ |
|---|
| 185 |
cbit_digit, -1, 0, /* digit */ |
|---|
| 186 |
cbit_graph, -1, 0, /* graph */ |
|---|
| 187 |
cbit_print, -1, 0, /* print */ |
|---|
| 188 |
cbit_punct, -1, 0, /* punct */ |
|---|
| 189 |
cbit_space, -1, 0, /* space */ |
|---|
| 190 |
cbit_word, -1, 0, /* word - a Perl extension */ |
|---|
| 191 |
cbit_xdigit,-1, 0 /* xdigit */ |
|---|
| 192 |
}; |
|---|
| 193 |
|
|---|
| 194 |
|
|---|
| 195 |
#define STRING(a) # a |
|---|
| 196 |
#define XSTRING(s) STRING(s) |
|---|
| 197 |
|
|---|
| 198 |
/* The texts of compile-time error messages. These are "char *" because they |
|---|
| 199 |
are passed to the outside world. Do not ever re-use any error number, because |
|---|
| 200 |
they are documented. Always add a new error instead. Messages marked DEAD below |
|---|
| 201 |
are no longer used. */ |
|---|
| 202 |
|
|---|
| 203 |
static const char *error_texts[] = { |
|---|
| 204 |
"no error", |
|---|
| 205 |
"\\ at end of pattern", |
|---|
| 206 |
"\\c at end of pattern", |
|---|
| 207 |
"unrecognized character follows \\", |
|---|
| 208 |
"numbers out of order in {} quantifier", |
|---|
| 209 |
/* 5 */ |
|---|
| 210 |
"number too big in {} quantifier", |
|---|
| 211 |
"missing terminating ] for character class", |
|---|
| 212 |
"invalid escape sequence in character class", |
|---|
| 213 |
"range out of order in character class", |
|---|
| 214 |
"nothing to repeat", |
|---|
| 215 |
/* 10 */ |
|---|
| 216 |
"operand of unlimited repeat could match the empty string", /** DEAD **/ |
|---|
| 217 |
"internal error: unexpected repeat", |
|---|
| 218 |
"unrecognized character after (?", |
|---|
| 219 |
"POSIX named classes are supported only within a class", |
|---|
| 220 |
"missing )", |
|---|
| 221 |
/* 15 */ |
|---|
| 222 |
"reference to non-existent subpattern", |
|---|
| 223 |
"erroffset passed as NULL", |
|---|
| 224 |
"unknown option bit(s) set", |
|---|
| 225 |
"missing ) after comment", |
|---|
| 226 |
"parentheses nested too deeply", /** DEAD **/ |
|---|
| 227 |
/* 20 */ |
|---|
| 228 |
"regular expression is too large", |
|---|
| 229 |
"failed to get memory", |
|---|
| 230 |
"unmatched parentheses", |
|---|
| 231 |
"internal error: code overflow", |
|---|
| 232 |
"unrecognized character after (?<", |
|---|
| 233 |
/* 25 */ |
|---|
| 234 |
"lookbehind assertion is not fixed length", |
|---|
| 235 |
"malformed number or name after (?(", |
|---|
| 236 |
"conditional group contains more than two branches", |
|---|
| 237 |
"assertion expected after (?(", |
|---|
| 238 |
"(?R or (?[+-]digits must be followed by )", |
|---|
| 239 |
/* 30 */ |
|---|
| 240 |
"unknown POSIX class name", |
|---|
| 241 |
"POSIX collating elements are not supported", |
|---|
| 242 |
"this version of PCRE is not compiled with PCRE_UTF8 support", |
|---|
| 243 |
"spare error", /** DEAD **/ |
|---|
| 244 |
"character value in \\x{...} sequence is too large", |
|---|
| 245 |
/* 35 */ |
|---|
| 246 |
"invalid condition (?(0)", |
|---|
| 247 |
"\\C not allowed in lookbehind assertion", |
|---|
| 248 |
"PCRE does not support \\L, \\l, \\N, \\U, or \\u", |
|---|
| 249 |
"number after (?C is > 255", |
|---|
| 250 |
"closing ) for (?C expected", |
|---|
| 251 |
/* 40 */ |
|---|
| 252 |
"recursive call could loop indefinitely", |
|---|
| 253 |
"unrecognized character after (?P", |
|---|
| 254 |
"syntax error in subpattern name (missing terminator)", |
|---|
| 255 |
"two named subpatterns have the same name", |
|---|
| 256 |
"invalid UTF-8 string", |
|---|
| 257 |
/* 45 */ |
|---|
| 258 |
"support for \\P, \\p, and \\X has not been compiled", |
|---|
| 259 |
"malformed \\P or \\p sequence", |
|---|
| 260 |
"unknown property name after \\P or \\p", |
|---|
| 261 |
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)", |
|---|
| 262 |
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")", |
|---|
| 263 |
/* 50 */ |
|---|
| 264 |
"repeated subpattern is too long", /** DEAD **/ |
|---|
| 265 |
"octal value is greater than \\377 (not in UTF-8 mode)", |
|---|
| 266 |
"internal error: overran compiling workspace", |
|---|
| 267 |
"internal error: previously-checked referenced subpattern not found", |
|---|
| 268 |
"DEFINE group contains more than one branch", |
|---|
| 269 |
/* 55 */ |
|---|
| 270 |
"repeating a DEFINE group is not allowed", |
|---|
| 271 |
"inconsistent NEWLINE options", |
|---|
| 272 |
"\\g is not followed by a braced name or an optionally braced non-zero number", |
|---|
| 273 |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number", |
|---|
| 274 |
"(*VERB) with an argument is not supported", |
|---|
| 275 |
/* 60 */ |
|---|
| 276 |
"(*VERB) not recognized", |
|---|
| 277 |
"number is too big" |
|---|
| 278 |
}; |
|---|
| 279 |
|
|---|
| 280 |
|
|---|
| 281 |
/* Table to identify digits and hex digits. This is used when compiling |
|---|
| 282 |
patterns. Note that the tables in chartables are dependent on the locale, and |
|---|
| 283 |
may mark arbitrary characters as digits - but the PCRE compiling code expects |
|---|
| 284 |
to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have |
|---|
| 285 |
a private table here. It costs 256 bytes, but it is a lot faster than doing |
|---|
| 286 |
character value tests (at least in some simple cases I timed), and in some |
|---|
| 287 |
applications one wants PCRE to compile efficiently as well as match |
|---|
| 288 |
efficiently. |
|---|
| 289 |
|
|---|
| 290 |
For convenience, we use the same bit definitions as in chartables: |
|---|
| 291 |
|
|---|
| 292 |
0x04 decimal digit |
|---|
| 293 |
0x08 hexadecimal digit |
|---|
| 294 |
|
|---|
| 295 |
Then we can use ctype_digit and ctype_xdigit in the code. */ |
|---|
| 296 |
|
|---|
| 297 |
#ifndef EBCDIC /* This is the "normal" case, for ASCII systems */ |
|---|
| 298 |
static const unsigned char digitab[] = |
|---|
| 299 |
{ |
|---|
| 300 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ |
|---|
| 301 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ |
|---|
| 302 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ |
|---|
| 303 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ |
|---|
| 304 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ |
|---|
| 305 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ |
|---|
| 306 |
0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ |
|---|
| 307 |
0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ |
|---|
| 308 |
0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ |
|---|
| 309 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ |
|---|
| 310 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ |
|---|
| 311 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ |
|---|
| 312 |
0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ |
|---|
| 313 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ |
|---|
| 314 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ |
|---|
| 315 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ |
|---|
| 316 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ |
|---|
| 317 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ |
|---|
| 318 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ |
|---|
| 319 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ |
|---|
| 320 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ |
|---|
| 321 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ |
|---|
| 322 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ |
|---|
| 323 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ |
|---|
| 324 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ |
|---|
| 325 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ |
|---|
| 326 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ |
|---|
| 327 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ |
|---|
| 328 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ |
|---|
| 329 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ |
|---|
| 330 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ |
|---|
| 331 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ |
|---|
| 332 |
|
|---|
| 333 |
#else /* This is the "abnormal" case, for EBCDIC systems */ |
|---|
| 334 |
static const unsigned char digitab[] = |
|---|
| 335 |
{ |
|---|
| 336 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ |
|---|
| 337 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ |
|---|
| 338 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ |
|---|
| 339 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ |
|---|
| 340 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ |
|---|
| 341 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ |
|---|
| 342 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ |
|---|
| 343 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ |
|---|
| 344 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ |
|---|
| 345 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ |
|---|
| 346 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ |
|---|
| 347 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */ |
|---|
| 348 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ |
|---|
| 349 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ |
|---|
| 350 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ |
|---|
| 351 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ |
|---|
| 352 |
0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ |
|---|
| 353 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ |
|---|
| 354 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ |
|---|
| 355 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ |
|---|
| 356 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ |
|---|
| 357 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ |
|---|
| 358 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ |
|---|
| 359 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ |
|---|
| 360 |
0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ |
|---|
| 361 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ |
|---|
| 362 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ |
|---|
| 363 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ |
|---|
| 364 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ |
|---|
| 365 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ |
|---|
| 366 |
0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ |
|---|
| 367 |
0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ |
|---|
| 368 |
|
|---|
| 369 |
static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */ |
|---|
| 370 |
0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ |
|---|
| 371 |
0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ |
|---|
| 372 |
0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ |
|---|
| 373 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ |
|---|
| 374 |
0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ |
|---|
| 375 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ |
|---|
| 376 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ |
|---|
| 377 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ |
|---|
| 378 |
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ |
|---|
| 379 |
0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ |
|---|
| 380 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ |
|---|
| 381 |
0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */ |
|---|
| 382 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ |
|---|
| 383 |
0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ |
|---|
| 384 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ |
|---|
| 385 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ |
|---|
| 386 |
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ |
|---|
| 387 |
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ |
|---|
| 388 |
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ |
|---|
| 389 |
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ |
|---|
| 390 |
0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ |
|---|
| 391 |
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ |
|---|
| 392 |
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ |
|---|
| 393 |
0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ |
|---|
| 394 |
0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ |
|---|
| 395 |
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ |
|---|
| 396 |
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ |
|---|
| 397 |
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ |
|---|
| 398 |
0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ |
|---|
| 399 |
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ |
|---|
| 400 |
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ |
|---|
| 401 |
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ |
|---|
| 402 |
#endif |
|---|
| 403 |
|
|---|
| 404 |
|
|---|
| 405 |
/* Definition to allow mutual recursion */ |
|---|
| 406 |
|
|---|
| 407 |
static BOOL |
|---|
| 408 |
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int, |
|---|
| 409 |
int *, int *, branch_chain *, compile_data *, int *); |
|---|
| 410 |
|
|---|
| 411 |
|
|---|
| 412 |
|
|---|
| 413 |
/************************************************* |
|---|
| 414 |
* Handle escapes * |
|---|
| 415 |
*************************************************/ |
|---|
| 416 |
|
|---|
| 417 |
/* This function is called when a \ has been encountered. It either returns a |
|---|
| 418 |
positive value for a simple escape such as \n, or a negative value which |
|---|
| 419 |
encodes one of the more complicated things such as \d. A backreference to group |
|---|
| 420 |
n is returned as -(ESC_REF + n); ESC_REF is the highest ESC_xxx macro. When |
|---|
| 421 |
UTF-8 is enabled, a positive value greater than 255 may be returned. On entry, |
|---|
| 422 |
ptr is pointing at the \. On exit, it is on the final character of the escape |
|---|
| 423 |
sequence. |
|---|
| 424 |
|
|---|
| 425 |
Arguments: |
|---|
| 426 |
ptrptr points to the pattern position pointer |
|---|
| 427 |
errorcodeptr points to the errorcode variable |
|---|
| 428 |
bracount number of previous extracting brackets |
|---|
| 429 |
options the options bits |
|---|
| 430 |
isclass TRUE if inside a character class |
|---|
| 431 |
|
|---|
| 432 |
Returns: zero or positive => a data character |
|---|
| 433 |
negative => a special escape sequence |
|---|
| 434 |
on error, errorcodeptr is set |
|---|
| 435 |
*/ |
|---|
| 436 |
|
|---|
| 437 |
static int |
|---|
| 438 |
check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount, |
|---|
| 439 |
int options, BOOL isclass) |
|---|
| 440 |
{ |
|---|
| 441 |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
|---|
| 442 |
const uschar *ptr = *ptrptr + 1; |
|---|
| 443 |
int c, i; |
|---|
| 444 |
|
|---|
| 445 |
GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ |
|---|
| 446 |
ptr--; /* Set pointer back to the last byte */ |
|---|
| 447 |
|
|---|
| 448 |
/* If backslash is at the end of the pattern, it's an error. */ |
|---|
| 449 |
|
|---|
| 450 |
if (c == 0) *errorcodeptr = ERR1; |
|---|
| 451 |
|
|---|
| 452 |
/* Non-alphamerics are literals. For digits or letters, do an initial lookup in |
|---|
| 453 |
a table. A non-zero result is something that can be returned immediately. |
|---|
| 454 |
Otherwise further processing may be required. */ |
|---|
| 455 |
|
|---|
| 456 |
#ifndef EBCDIC /* ASCII coding */ |
|---|
| 457 |
else if (c < '0' || c > 'z') {} /* Not alphameric */ |
|---|
| 458 |
else if ((i = escapes[c - '0']) != 0) c = i; |
|---|
| 459 |
|
|---|
| 460 |
#else /* EBCDIC coding */ |
|---|
| 461 |
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */ |
|---|
| 462 |
else if ((i = escapes[c - 0x48]) != 0) c = i; |
|---|
| 463 |
#endif |
|---|
| 464 |
|
|---|
| 465 |
/* Escapes that need further processing, or are illegal. */ |
|---|
| 466 |
|
|---|
| 467 |
else |
|---|
| 468 |
{ |
|---|
| 469 |
const uschar *oldptr; |
|---|
| 470 |
BOOL braced, negated; |
|---|
| 471 |
|
|---|
| 472 |
switch (c) |
|---|
| 473 |
{ |
|---|
| 474 |
/* A number of Perl escapes are not handled by PCRE. We give an explicit |
|---|
| 475 |
error. */ |
|---|
| 476 |
|
|---|
| 477 |
case 'l': |
|---|
| 478 |
case 'L': |
|---|
| 479 |
case 'N': |
|---|
| 480 |
case 'u': |
|---|
| 481 |
case 'U': |
|---|
| 482 |
*errorcodeptr = ERR37; |
|---|
| 483 |
break; |
|---|
| 484 |
|
|---|
| 485 |
/* \g must be followed by a number, either plain or braced. If positive, it |
|---|
| 486 |
is an absolute backreference. If negative, it is a relative backreference. |
|---|
| 487 |
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a |
|---|
| 488 |
reference to a named group. This is part of Perl's movement towards a |
|---|
| 489 |
unified syntax for back references. As this is synonymous with \k{name}, we |
|---|
| 490 |
fudge it up by pretending it really was \k. */ |
|---|
| 491 |
|
|---|
| 492 |
case 'g': |
|---|
| 493 |
if (ptr[1] == '{') |
|---|
| 494 |
{ |
|---|
| 495 |
const uschar *p; |
|---|
| 496 |
for (p = ptr+2; *p != 0 && *p != '}'; p++) |
|---|
| 497 |
if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break; |
|---|
| 498 |
if (*p != 0 && *p != '}') |
|---|
| 499 |
{ |
|---|
| 500 |
c = -ESC_k; |
|---|
| 501 |
break; |
|---|
| 502 |
} |
|---|
| 503 |
braced = TRUE; |
|---|
| 504 |
ptr++; |
|---|
| 505 |
} |
|---|
| 506 |
else braced = FALSE; |
|---|
| 507 |
|
|---|
| 508 |
if (ptr[1] == '-') |
|---|
| 509 |
{ |
|---|
| 510 |
negated = TRUE; |
|---|
| 511 |
ptr++; |
|---|
| 512 |
} |
|---|
| 513 |
else negated = FALSE; |
|---|
| 514 |
|
|---|
| 515 |
c = 0; |
|---|
| 516 |
while ((digitab[ptr[1]] & ctype_digit) != 0) |
|---|
| 517 |
c = c * 10 + *(++ptr) - '0'; |
|---|
| 518 |
|
|---|
| 519 |
if (c < 0) |
|---|
| 520 |
{ |
|---|
| 521 |
*errorcodeptr = ERR61; |
|---|
| 522 |
break; |
|---|
| 523 |
} |
|---|
| 524 |
|
|---|
| 525 |
if (c == 0 || (braced && *(++ptr) != '}')) |
|---|
| 526 |
{ |
|---|
| 527 |
*errorcodeptr = ERR57; |
|---|
| 528 |
break; |
|---|
| 529 |
} |
|---|
| 530 |
|
|---|
| 531 |
if (negated) |
|---|
| 532 |
{ |
|---|
| 533 |
if (c > bracount) |
|---|
| 534 |
{ |
|---|
| 535 |
*errorcodeptr = ERR15; |
|---|
| 536 |
break; |
|---|
| 537 |
} |
|---|
| 538 |
c = bracount - (c - 1); |
|---|
| 539 |
} |
|---|
| 540 |
|
|---|
| 541 |
c = -(ESC_REF + c); |
|---|
| 542 |
break; |
|---|
| 543 |
|
|---|
| 544 |
/* The handling of escape sequences consisting of a string of digits |
|---|
| 545 |
starting with one that is not zero is not straightforward. By experiment, |
|---|
| 546 |
the way Perl works seems to be as follows: |
|---|
| 547 |
|
|---|
| 548 |
Outside a character class, the digits are read as a decimal number. If the |
|---|
| 549 |
number is less than 10, or if there are that many previous extracting |
|---|
| 550 |
left brackets, then it is a back reference. Otherwise, up to three octal |
|---|
| 551 |
digits are read to form an escaped byte. Thus \123 is likely to be octal |
|---|
| 552 |
123 (cf \0123, which is octal 012 followed by the literal 3). If the octal |
|---|
| 553 |
value is greater than 377, the least significant 8 bits are taken. Inside a |
|---|
| 554 |
character class, \ followed by a digit is always an octal number. */ |
|---|
| 555 |
|
|---|
| 556 |
case '1': case '2': case '3': case '4': case '5': |
|---|
| 557 |
case '6': case '7': case '8': case '9': |
|---|
| 558 |
|
|---|
| 559 |
if (!isclass) |
|---|
| 560 |
{ |
|---|
| 561 |
oldptr = ptr; |
|---|
| 562 |
c -= '0'; |
|---|
| 563 |
while ((digitab[ptr[1]] & ctype_digit) != 0) |
|---|
| 564 |
c = c * 10 + *(++ptr) - '0'; |
|---|
| 565 |
if (c < 0) |
|---|
| 566 |
{ |
|---|
| 567 |
*errorcodeptr = ERR61; |
|---|
| 568 |
break; |
|---|
| 569 |
} |
|---|
| 570 |
if (c < 10 || c <= bracount) |
|---|
| 571 |
{ |
|---|
| 572 |
c = -(ESC_REF + c); |
|---|
| 573 |
break; |
|---|
| 574 |
} |
|---|
| 575 |
ptr = oldptr; /* Put the pointer back and fall through */ |
|---|
| 576 |
} |
|---|
| 577 |
|
|---|
| 578 |
/* Handle an octal number following \. If the first digit is 8 or 9, Perl |
|---|
| 579 |
generates a binary zero byte and treats the digit as a following literal. |
|---|
| 580 |
Thus we have to pull back the pointer by one. */ |
|---|
| 581 |
|
|---|
| 582 |
if ((c = *ptr) >= '8') |
|---|
| 583 |
{ |
|---|
| 584 |
ptr--; |
|---|
| 585 |
c = 0; |
|---|
| 586 |
break; |
|---|
| 587 |
} |
|---|
| 588 |
|
|---|
| 589 |
/* \0 always starts an octal number, but we may drop through to here with a |
|---|
| 590 |
larger first octal digit. The original code used just to take the least |
|---|
| 591 |
significant 8 bits of octal numbers (I think this is what early Perls used |
|---|
| 592 |
to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more |
|---|
| 593 |
than 3 octal digits. */ |
|---|
| 594 |
|
|---|
| 595 |
case '0': |
|---|
| 596 |
c -= '0'; |
|---|
| 597 |
while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7') |
|---|
| 598 |
c = c * 8 + *(++ptr) - '0'; |
|---|
| 599 |
if (!utf8 && c > 255) *errorcodeptr = ERR51; |
|---|
| 600 |
break; |
|---|
| 601 |
|
|---|
| 602 |
/* \x is complicated. \x{ddd} is a character number which can be greater |
|---|
| 603 |
than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is |
|---|
| 604 |
treated as a data character. */ |
|---|
| 605 |
|
|---|
| 606 |
case 'x': |
|---|
| 607 |
if (ptr[1] == '{') |
|---|
| 608 |
{ |
|---|
| 609 |
const uschar *pt = ptr + 2; |
|---|
| 610 |
int count = 0; |
|---|
| 611 |
|
|---|
| 612 |
c = 0; |
|---|
| 613 |
while ((digitab[*pt] & ctype_xdigit) != 0) |
|---|
| 614 |
{ |
|---|
| 615 |
register int cc = *pt++; |
|---|
| 616 |
if (c == 0 && cc == '0') continue; /* Leading zeroes */ |
|---|
| 617 |
count++; |
|---|
| 618 |
|
|---|
| 619 |
#ifndef EBCDIC /* ASCII coding */ |
|---|
| 620 |
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
|---|
| 621 |
c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10)); |
|---|
| 622 |
#else /* EBCDIC coding */ |
|---|
| 623 |
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */ |
|---|
| 624 |
c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10)); |
|---|
| 625 |
#endif |
|---|
| 626 |
} |
|---|
| 627 |
|
|---|
| 628 |
if (*pt == '}') |
|---|
| 629 |
{ |
|---|
| 630 |
if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34; |
|---|
| 631 |
ptr = pt; |
|---|
| 632 |
break; |
|---|
| 633 |
} |
|---|
| 634 |
|
|---|
| 635 |
/* If the sequence of hex digits does not end with '}', then we don't |
|---|
| 636 |
recognize this construct; fall through to the normal \x handling. */ |
|---|
| 637 |
} |
|---|
| 638 |
|
|---|
| 639 |
/* Read just a single-byte hex-defined char */ |
|---|
| 640 |
|
|---|
| 641 |
c = 0; |
|---|
| 642 |
while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) |
|---|
| 643 |
{ |
|---|
| 644 |
int cc; /* Some compilers don't like ++ */ |
|---|
| 645 |
cc = *(++ptr); /* in initializers */ |
|---|
| 646 |
#ifndef EBCDIC /* ASCII coding */ |
|---|
| 647 |
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
|---|
| 648 |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
|---|
| 649 |
#else /* EBCDIC coding */ |
|---|
| 650 |
if (cc <= 'z') cc += 64; /* Convert to upper case */ |
|---|
| 651 |
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10)); |
|---|
| 652 |
#endif |
|---|
| 653 |
} |
|---|
| 654 |
break; |
|---|
| 655 |
|
|---|
| 656 |
/* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. |
|---|
| 657 |
This coding is ASCII-specific, but then the whole concept of \cx is |
|---|
| 658 |
ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ |
|---|
| 659 |
|
|---|
| 660 |
case 'c': |
|---|
| 661 |
c = *(++ptr); |
|---|
| 662 |
if (c == 0) |
|---|
| 663 |
{ |
|---|
| 664 |
*errorcodeptr = ERR2; |
|---|
| 665 |
break; |
|---|
| 666 |
} |
|---|
| 667 |
|
|---|
| 668 |
#ifndef EBCDIC /* ASCII coding */ |
|---|
| 669 |
if (c >= 'a' && c <= 'z') c -= 32; |
|---|
| 670 |
c ^= 0x40; |
|---|
| 671 |
#else /* EBCDIC coding */ |
|---|
| 672 |
if (c >= 'a' && c <= 'z') c += 64; |
|---|
| 673 |
c ^= 0xC0; |
|---|
| 674 |
#endif |
|---|
| 675 |
break; |
|---|
| 676 |
|
|---|
| 677 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
|---|
| 678 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
|---|
| 679 |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
|---|
| 680 |
there used to be some cases other than the default, and there may be again |
|---|
| 681 |
in future, so I haven't "optimized" it. */ |
|---|
| 682 |
|
|---|
| 683 |
default: |
|---|
| 684 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
|---|
| 685 |
{ |
|---|
| 686 |
default: |
|---|
| 687 |
*errorcodeptr = ERR3; |
|---|
| 688 |
break; |
|---|
| 689 |
} |
|---|
| 690 |
break; |
|---|
| 691 |
} |
|---|
| 692 |
} |
|---|
| 693 |
|
|---|
| 694 |
*ptrptr = ptr; |
|---|
| 695 |
return c; |
|---|
| 696 |
} |
|---|
| 697 |
|
|---|
| 698 |
|
|---|
| 699 |
|
|---|
| 700 |
#ifdef SUPPORT_UCP |
|---|
| 701 |
/************************************************* |
|---|
| 702 |
* Handle \P and \p * |
|---|
| 703 |
*************************************************/ |
|---|
| 704 |
|
|---|
| 705 |
/* This function is called after \P or \p has been encountered, provided that |
|---|
| 706 |
PCRE is compiled with support for Unicode properties. On entry, ptrptr is |
|---|
| 707 |
pointing at the P or p. On exit, it is pointing at the final character of the |
|---|
| 708 |
escape sequence. |
|---|
| 709 |
|
|---|
| 710 |
Argument: |
|---|
| 711 |
ptrptr points to the pattern position pointer |
|---|
| 712 |
negptr points to a boolean that is set TRUE for negation else FALSE |
|---|
| 713 |
dptr points to an int that is set to the detailed property value |
|---|
| 714 |
errorcodeptr points to the error code variable |
|---|
| 715 |
|
|---|
| 716 |
Returns: type value from ucp_type_table, or -1 for an invalid type |
|---|
| 717 |
*/ |
|---|
| 718 |
|
|---|
| 719 |
static int |
|---|
| 720 |
get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr) |
|---|
| 721 |
{ |
|---|
| 722 |
int c, i, bot, top; |
|---|
| 723 |
const uschar *ptr = *ptrptr; |
|---|
| 724 |
char name[32]; |
|---|
| 725 |
|
|---|
| 726 |
c = *(++ptr); |
|---|
| 727 |
if (c == 0) goto ERROR_RETURN; |
|---|
| 728 |
|
|---|
| 729 |
*negptr = FALSE; |
|---|
| 730 |
|
|---|
| 731 |
/* \P or \p can be followed by a name in {}, optionally preceded by ^ for |
|---|
| 732 |
negation. */ |
|---|
| 733 |
|
|---|
| 734 |
if (c == '{') |
|---|
| 735 |
{ |
|---|
| 736 |
if (ptr[1] == '^') |
|---|
| 737 |
{ |
|---|
| 738 |
*negptr = TRUE; |
|---|
| 739 |
ptr++; |
|---|
| 740 |
} |
|---|
| 741 |
for (i = 0; i < (int)sizeof(name) - 1; i++) |
|---|
| 742 |
{ |
|---|
| 743 |
c = *(++ptr); |
|---|
| 744 |
if (c == 0) goto ERROR_RETURN; |
|---|
| 745 |
if (c == '}') break; |
|---|
| 746 |
name[i] = c; |
|---|
| 747 |
} |
|---|
| 748 |
if (c !='}') goto ERROR_RETURN; |
|---|
| 749 |
name[i] = 0; |
|---|
| 750 |
} |
|---|
| 751 |
|
|---|
| 752 |
/* Otherwise there is just one following character */ |
|---|
| 753 |
|
|---|
| 754 |
else |
|---|
| 755 |
{ |
|---|
| 756 |
name[0] = c; |
|---|
| 757 |
name[1] = 0; |
|---|
| 758 |
} |
|---|
| 759 |
|
|---|
| 760 |
*ptrptr = ptr; |
|---|
| 761 |
|
|---|
| 762 |
/* Search for a recognized property name using binary chop */ |
|---|
| 763 |
|
|---|
| 764 |
bot = 0; |
|---|
| 765 |
top = _pcre_utt_size; |
|---|
| 766 |
|
|---|
| 767 |
while (bot < top) |
|---|
| 768 |
{ |
|---|
| 769 |
i = (bot + top) >> 1; |
|---|
| 770 |
c = strcmp(name, _pcre_utt[i].name); |
|---|
| 771 |
if (c == 0) |
|---|
| 772 |
{ |
|---|
| 773 |
*dptr = _pcre_utt[i].value; |
|---|
| 774 |
return _pcre_utt[i].type; |
|---|
| 775 |
} |
|---|
| 776 |
if (c > 0) bot = i + 1; else top = i; |
|---|
| 777 |
} |
|---|
| 778 |
|
|---|
| 779 |
*errorcodeptr = ERR47; |
|---|
| 780 |
*ptrptr = ptr; |
|---|
| 781 |
return -1; |
|---|
| 782 |
|
|---|
| 783 |
ERROR_RETURN: |
|---|
| 784 |
*errorcodeptr = ERR46; |
|---|
| 785 |
*ptrptr = ptr; |
|---|
| 786 |
return -1; |
|---|
| 787 |
} |
|---|
| 788 |
#endif |
|---|
| 789 |
|
|---|
| 790 |
|
|---|
| 791 |
|
|---|
| 792 |
|
|---|
| 793 |
/************************************************* |
|---|
| 794 |
* Check for counted repeat * |
|---|
| 795 |
*************************************************/ |
|---|
| 796 |
|
|---|
| 797 |
/* This function is called when a '{' is encountered in a place where it might |
|---|
| 798 |
start a quantifier. It looks ahead to see if it really is a quantifier or not. |
|---|
| 799 |
It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} |
|---|
| 800 |
where the ddds are digits. |
|---|
| 801 |
|
|---|
| 802 |
Arguments: |
|---|
| 803 |
p pointer to the first char after '{' |
|---|
| 804 |
|
|---|
| 805 |
Returns: TRUE or FALSE |
|---|
| 806 |
*/ |
|---|
| 807 |
|
|---|
| 808 |
static BOOL |
|---|
| 809 |
is_counted_repeat(const uschar *p) |
|---|
| 810 |
{ |
|---|
| 811 |
if ((digitab[*p++] & ctype_digit) == 0) return FALSE; |
|---|
| 812 |
while ((digitab[*p] & ctype_digit) != 0) p++; |
|---|
| 813 |
if (*p == '}') return TRUE; |
|---|
| 814 |
|
|---|
| 815 |
if (*p++ != ',') return FALSE; |
|---|
| 816 |
if (*p == '}') return TRUE; |
|---|
| 817 |
|
|---|
| 818 |
if ((digitab[*p++] & ctype_digit) == 0) return FALSE; |
|---|
| 819 |
while ((digitab[*p] & ctype_digit) != 0) p++; |
|---|
| 820 |
|
|---|
| 821 |
return (*p == '}'); |
|---|
| 822 |
} |
|---|
| 823 |
|
|---|
| 824 |
|
|---|
| 825 |
|
|---|
| 826 |
/************************************************* |
|---|
| 827 |
* Read repeat counts * |
|---|
| 828 |
*************************************************/ |
|---|
| 829 |
|
|---|
| 830 |
/* Read an item of the form {n,m} and return the values. This is called only |
|---|
| 831 |
after is_counted_repeat() has confirmed that a repeat-count quantifier exists, |
|---|
| 832 |
so the syntax is guaranteed to be correct, but we need to check the values. |
|---|
| 833 |
|
|---|
| 834 |
Arguments: |
|---|
| 835 |
p pointer to first char after '{' |
|---|
| 836 |
minp pointer to int for min |
|---|
| 837 |
maxp pointer to int for max |
|---|
| 838 |
returned as -1 if no max |
|---|
| 839 |
errorcodeptr points to error code variable |
|---|
| 840 |
|
|---|
| 841 |
Returns: pointer to '}' on success; |
|---|
| 842 |
current ptr on error, with errorcodeptr set non-zero |
|---|
| 843 |
*/ |
|---|
| 844 |
|
|---|
| 845 |
static const uschar * |
|---|
| 846 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr) |
|---|
| 847 |
{ |
|---|
| 848 |
int min = 0; |
|---|
| 849 |
int max = -1; |
|---|
| 850 |
|
|---|
| 851 |
/* Read the minimum value and do a paranoid check: a negative value indicates |
|---|
| 852 |
an integer overflow. */ |
|---|
| 853 |
|
|---|
| 854 |
while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
|---|
| 855 |
if (min < 0 || min > 65535) |
|---|
| 856 |
{ |
|---|
| 857 |
*errorcodeptr = ERR5; |
|---|
| 858 |
return p; |
|---|
| 859 |
} |
|---|
| 860 |
|
|---|
| 861 |
/* Read the maximum value if there is one, and again do a paranoid on its size. |
|---|
| 862 |
Also, max must not be less than min. */ |
|---|
| 863 |
|
|---|
| 864 |
if (*p == '}') max = min; else |
|---|
| 865 |
{ |
|---|
| 866 |
if (*(++p) != '}') |
|---|
| 867 |
{ |
|---|
| 868 |
max = 0; |
|---|
| 869 |
while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
|---|
| 870 |
if (max < 0 || max > 65535) |
|---|
| 871 |
{ |
|---|
| 872 |
*errorcodeptr = ERR5; |
|---|
| 873 |
return p; |
|---|
| 874 |
} |
|---|
| 875 |
if (max < min) |
|---|
| 876 |
{ |
|---|
| 877 |
*errorcodeptr = ERR4; |
|---|
| 878 |
return p; |
|---|
| 879 |
} |
|---|
| 880 |
} |
|---|
| 881 |
} |
|---|
| 882 |
|
|---|
| 883 |
/* Fill in the required variables, and pass back the pointer to the terminating |
|---|
| 884 |
'}'. */ |
|---|
| 885 |
|
|---|
| 886 |
*minp = min; |
|---|
| 887 |
*maxp = max; |
|---|
| 888 |
return p; |
|---|
| 889 |
} |
|---|
| 890 |
|
|---|
| 891 |
|
|---|
| 892 |
|
|---|
| 893 |
/************************************************* |
|---|
| 894 |
* Find forward referenced subpattern * |
|---|
| 895 |
*************************************************/ |
|---|
| 896 |
|
|---|
| 897 |
/* This function scans along a pattern's text looking for capturing |
|---|
| 898 |
subpatterns, and counting them. If it finds a named pattern that matches the |
|---|
| 899 |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
|---|
| 900 |
returns when it reaches a given numbered subpattern. This is used for forward |
|---|
| 901 |
references to subpatterns. We know that if (?P< is encountered, the name will |
|---|
| 902 |
be terminated by '>' because that is checked in the first pass. |
|---|
| 903 |
|
|---|
| 904 |
Arguments: |
|---|
| 905 |
ptr current position in the pattern |
|---|
| 906 |
count current count of capturing parens so far encountered |
|---|
| 907 |
name name to seek, or NULL if seeking a numbered subpattern |
|---|
| 908 |
lorn name length, or subpattern number if name is NULL |
|---|
| 909 |
xmode TRUE if we are in /x mode |
|---|
| 910 |
|
|---|
| 911 |
Returns: the number of the named subpattern, or -1 if not found |
|---|
| 912 |
*/ |
|---|
| 913 |
|
|---|
| 914 |
static int |
|---|
| 915 |
find_parens(const uschar *ptr, int count, const uschar *name, int lorn, |
|---|
| 916 |
BOOL xmode) |
|---|
| 917 |
{ |
|---|
| 918 |
const uschar *thisname; |
|---|
| 919 |
|
|---|
| 920 |
for (; *ptr != 0; ptr++) |
|---|
| 921 |
{ |
|---|
| 922 |
int term; |
|---|
| 923 |
|
|---|
| 924 |
/* Skip over backslashed characters and also entire \Q...\E */ |
|---|
| 925 |
|
|---|
| 926 |
if (*ptr == '\\') |
|---|
| 927 |
{ |
|---|
| 928 |
if (*(++ptr) == 0) return -1; |
|---|
| 929 |
if (*ptr == 'Q') for (;;) |
|---|
| 930 |
{ |
|---|
| 931 |
while (*(++ptr) != 0 && *ptr != '\\'); |
|---|
| 932 |
if (*ptr == 0) return -1; |
|---|
| 933 |
if (*(++ptr) == 'E') break; |
|---|
| 934 |
} |
|---|
| 935 |
continue; |
|---|
| 936 |
} |
|---|
| 937 |
|
|---|
| 938 |
/* Skip over character classes */ |
|---|
| 939 |
|
|---|
| 940 |
if (*ptr == '[') |
|---|
| 941 |
{ |
|---|
| 942 |
while (*(++ptr) != ']') |
|---|
| 943 |
{ |
|---|
| 944 |
if (*ptr == 0) return -1; |
|---|
| 945 |
if (*ptr == '\\') |
|---|
| 946 |
{ |
|---|
| 947 |
if (*(++ptr) == 0) return -1; |
|---|
| 948 |
if (*ptr == 'Q') for (;;) |
|---|
| 949 |
{ |
|---|
| 950 |
while (*(++ptr) != 0 && *ptr != '\\'); |
|---|
| 951 |
if (*ptr == 0) return -1; |
|---|
| 952 |
if (*(++ptr) == 'E') break; |
|---|
| 953 |
} |
|---|
| 954 |
continue; |
|---|
| 955 |
} |
|---|
| 956 |
} |
|---|
| 957 |
continue; |
|---|
| 958 |
} |
|---|
| 959 |
|
|---|
| 960 |
/* Skip comments in /x mode */ |
|---|
| 961 |
|
|---|
| 962 |
if (xmode && *ptr == '#') |
|---|
| 963 |
{ |
|---|
| 964 |
while (*(++ptr) != 0 && *ptr != '\n'); |
|---|
| 965 |
if (*ptr == 0) return -1; |
|---|
| 966 |
continue; |
|---|
| 967 |
} |
|---|
| 968 |
|
|---|
| 969 |
/* An opening parens must now be a real metacharacter */ |
|---|
| 970 |
|
|---|
| 971 |
if (*ptr != '(') continue; |
|---|
| 972 |
if (ptr[1] != '?' && ptr[1] != '*') |
|---|
| 973 |
{ |
|---|
| 974 |
count++; |
|---|
| 975 |
if (name == NULL && count == lorn) return count; |
|---|
| 976 |
continue; |
|---|
| 977 |
} |
|---|
| 978 |
|
|---|
| 979 |
ptr += 2; |
|---|
| 980 |
if (*ptr == 'P') ptr++; /* Allow optional P */ |
|---|
| 981 |
|
|---|
| 982 |
/* We have to disambiguate (?<! and (?<= from (?<name> */ |
|---|
| 983 |
|
|---|
| 984 |
if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') && |
|---|
| 985 |
*ptr != '\'') |
|---|
| 986 |
continue; |
|---|
| 987 |
|
|---|
| 988 |
count++; |
|---|
| 989 |
|
|---|
| 990 |
if (name == NULL && count == lorn) return count; |
|---|
| 991 |
term = *ptr++; |
|---|
| 992 |
if (term == '<') term = '>'; |
|---|
| 993 |
thisname = ptr; |
|---|
| 994 |
while (*ptr != term) ptr++; |
|---|
| 995 |
if (name != NULL && lorn == ptr - thisname && |
|---|
| 996 |
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
|---|
| 997 |
return count; |
|---|
| 998 |
} |
|---|
| 999 |
|
|---|
| 1000 |
return -1; |
|---|
| 1001 |
} |
|---|
| 1002 |
|
|---|
| 1003 |
|
|---|
| 1004 |
|
|---|
| 1005 |
/************************************************* |
|---|
| 1006 |
* Find first significant op code * |
|---|
| 1007 |
*************************************************/ |
|---|
| 1008 |
|
|---|
| 1009 |
/* This is called by several functions that scan a compiled expression looking |
|---|
| 1010 |
for a fixed first character, or an anchoring op code etc. It skips over things |
|---|
| 1011 |
that do not influence this. For some calls, a change of option is important. |
|---|
| 1012 |
For some calls, it makes sense to skip negative forward and all backward |
|---|
| 1013 |
assertions, and also the \b assertion; for others it does not. |
|---|
| 1014 |
|
|---|
| 1015 |
Arguments: |
|---|
| 1016 |
code pointer to the start of the group |
|---|
| 1017 |
options pointer to external options |
|---|
| 1018 |
optbit the option bit whose changing is significant, or |
|---|
| 1019 |
zero if none are |
|---|
| 1020 |
skipassert TRUE if certain assertions are to be skipped |
|---|
| 1021 |
|
|---|
| 1022 |
Returns: pointer to the first significant opcode |
|---|
| 1023 |
*/ |
|---|
| 1024 |
|
|---|
| 1025 |
static const uschar* |
|---|
| 1026 |
first_significant_code(const uschar *code, int *options, int optbit, |
|---|
| 1027 |
BOOL skipassert) |
|---|
| 1028 |
{ |
|---|
| 1029 |
for (;;) |
|---|
| 1030 |
{ |
|---|
| 1031 |
switch ((int)*code) |
|---|
| 1032 |
{ |
|---|
| 1033 |
case OP_OPT: |
|---|
| 1034 |
if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) |
|---|
| 1035 |
*options = (int)code[1]; |
|---|
| 1036 |
code += 2; |
|---|
| 1037 |
break; |
|---|
| 1038 |
|
|---|
| 1039 |
case OP_ASSERT_NOT: |
|---|
| 1040 |
case OP_ASSERTBACK: |
|---|
| 1041 |
case OP_ASSERTBACK_NOT: |
|---|
| 1042 |
if (!skipassert) return code; |
|---|
| 1043 |
do code += GET(code, 1); while (*code == OP_ALT); |
|---|
| 1044 |
code += _pcre_OP_lengths[*code]; |
|---|
| 1045 |
break; |
|---|
| 1046 |
|
|---|
| 1047 |
case OP_WORD_BOUNDARY: |
|---|
| 1048 |
case OP_NOT_WORD_BOUNDARY: |
|---|
| 1049 |
if (!skipassert) return code; |
|---|
| 1050 |
/* Fall through */ |
|---|
| 1051 |
|
|---|
| 1052 |
case OP_CALLOUT: |
|---|
| 1053 |
case OP_CREF: |
|---|
| 1054 |
case OP_RREF: |
|---|
| 1055 |
case OP_DEF: |
|---|
| 1056 |
code += _pcre_OP_lengths[*code]; |
|---|
| 1057 |
break; |
|---|
| 1058 |
|
|---|
| 1059 |
default: |
|---|
| 1060 |
return code; |
|---|
| 1061 |
} |
|---|
| 1062 |
} |
|---|
| 1063 |
/* Control never reaches here */ |
|---|
| 1064 |
} |
|---|
| 1065 |
|
|---|
| 1066 |
|
|---|
| 1067 |
|
|---|
| 1068 |
|
|---|
| 1069 |
/************************************************* |
|---|
| 1070 |
* Find the fixed length of a pattern * |
|---|
| 1071 |
*************************************************/ |
|---|
| 1072 |
|
|---|
| 1073 |
/* Scan a pattern and compute the fixed length of subject that will match it, |
|---|
| 1074 |
if the length is fixed. This is needed for dealing with backward assertions. |
|---|
| 1075 |
In UTF8 mode, the result is in characters rather than bytes. |
|---|
| 1076 |
|
|---|
| 1077 |
Arguments: |
|---|
| 1078 |
code points to the start of the pattern (the bracket) |
|---|
| 1079 |
options the compiling options |
|---|
| 1080 |
|
|---|
| 1081 |
Returns: the fixed length, or -1 if there is no fixed length, |
|---|
| 1082 |
or -2 if \C was encountered |
|---|
| 1083 |
*/ |
|---|
| 1084 |
|
|---|
| 1085 |
static int |
|---|
| 1086 |
find_fixedlength(uschar *code, int options) |
|---|
| 1087 |
{ |
|---|
| 1088 |
int length = -1; |
|---|
| 1089 |
|
|---|
| 1090 |
register int branchlength = 0; |
|---|
| 1091 |
register uschar *cc = code + 1 + LINK_SIZE; |
|---|
| 1092 |
|
|---|
| 1093 |
/* Scan along the opcodes for this branch. If we get to the end of the |
|---|
| 1094 |
branch, check the length against that of the other branches. */ |
|---|
| 1095 |
|
|---|
| 1096 |
for (;;) |
|---|
| 1097 |
{ |
|---|
| 1098 |
int d; |
|---|
| 1099 |
register int op = *cc; |
|---|
| 1100 |
switch (op) |
|---|
| 1101 |
{ |
|---|
| 1102 |
case OP_CBRA: |
|---|
| 1103 |
case OP_BRA: |
|---|
| 1104 |
case OP_ONCE: |
|---|
| 1105 |
case OP_COND: |
|---|
| 1106 |
d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options); |
|---|
| 1107 |
if (d < 0) return d; |
|---|
| 1108 |
branchlength += d; |
|---|
| 1109 |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
|---|
| 1110 |
cc += 1 + LINK_SIZE; |
|---|
| 1111 |
break; |
|---|
| 1112 |
|
|---|
| 1113 |
/* Reached end of a branch; if it's a ket it is the end of a nested |
|---|
| 1114 |
call. If it's ALT it is an alternation in a nested call. If it is |
|---|
| 1115 |
END it's the end of the outer call. All can be handled by the same code. */ |
|---|
| 1116 |
|
|---|
| 1117 |
case OP_ALT: |
|---|
| 1118 |
case OP_KET: |
|---|
| 1119 |
case OP_KETRMAX: |
|---|
| 1120 |
case OP_KETRMIN: |
|---|
| 1121 |
case OP_END: |
|---|
| 1122 |
if (length < 0) length = branchlength; |
|---|
| 1123 |
else if (length != branchlength) return -1; |
|---|
| 1124 |
if (*cc != OP_ALT) return length; |
|---|
| 1125 |
cc += 1 + LINK_SIZE; |
|---|
| 1126 |
branchlength = 0; |
|---|
| 1127 |
break; |
|---|
| 1128 |
|
|---|
| 1129 |
/* Skip over assertive subpatterns */ |
|---|
| 1130 |
|
|---|
| 1131 |
case OP_ASSERT: |
|---|
| 1132 |
case OP_ASSERT_NOT: |
|---|
| 1133 |
case OP_ASSERTBACK: |
|---|
| 1134 |
case OP_ASSERTBACK_NOT: |
|---|
| 1135 |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
|---|
| 1136 |
/* Fall through */ |
|---|
| 1137 |
|
|---|
| 1138 |
/* Skip over things that don't match chars */ |
|---|
| 1139 |
|
|---|
| 1140 |
case OP_REVERSE: |
|---|
| 1141 |
case OP_CREF: |
|---|
| 1142 |
case OP_RREF: |
|---|
| 1143 |
case OP_DEF: |
|---|
| 1144 |
case OP_OPT: |
|---|
| 1145 |
case OP_CALLOUT: |
|---|
| 1146 |
case OP_SOD: |
|---|
| 1147 |
case OP_SOM: |
|---|
| 1148 |
case OP_EOD: |
|---|
| 1149 |
case OP_EODN: |
|---|
| 1150 |
case OP_CIRC: |
|---|
| 1151 |
case OP_DOLL: |
|---|
| 1152 |
case OP_NOT_WORD_BOUNDARY: |
|---|
| 1153 |
case OP_WORD_BOUNDARY: |
|---|
| 1154 |
cc += _pcre_OP_lengths[*cc]; |
|---|
| 1155 |
break; |
|---|
| 1156 |
|
|---|
| 1157 |
/* Handle literal characters */ |
|---|
| 1158 |
|
|---|
| 1159 |
case OP_CHAR: |
|---|
| 1160 |
case OP_CHARNC: |
|---|
| 1161 |
case OP_NOT: |
|---|
| 1162 |
branchlength++; |
|---|
| 1163 |
cc += 2; |
|---|
| 1164 |
#ifdef SUPPORT_UTF8 |
|---|
| 1165 |
if ((options & PCRE_UTF8) != 0) |
|---|
| 1166 |
{ |
|---|
| 1167 |
while ((*cc & 0xc0) == 0x80) cc++; |
|---|
| 1168 |
} |
|---|
| 1169 |
#endif |
|---|
| 1170 |
break; |
<