Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(185)

Side by Side Diff: src/third_party/jscre/pcre_compile.cpp

Issue 21504: Remove JSCRE (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7 Originally written by Philip Hazel
8 Copyright (c) 1997-2006 University of Cambridge
9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10 Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains the external function jsRegExpExecute(), along with
42 supporting internal functions that are not used by other modules. */
43
44 #include "config.h"
45
46 #include "pcre_internal.h"
47
48 #include <string.h>
49 #include "ASCIICType.h"
50
51 /* Negative values for the firstchar and reqchar variables */
52
53 #define REQ_UNSET (-2)
54 #define REQ_NONE (-1)
55
56 /*************************************************
57 * Code parameters and static tables *
58 *************************************************/
59
60 /* Maximum number of items on the nested bracket stacks at compile time. This
61 applies to the nesting of all kinds of parentheses. It does not limit
62 un-nested, non-capturing parentheses. This number can be made bigger if
63 necessary - it is used to dimension one int and one unsigned char vector at
64 compile time. */
65
66 #define BRASTACK_SIZE 200
67
68 namespace v8 { namespace jscre {
69
70 /* Table for handling escaped characters in the range '0'-'z'. Positive returns
71 are simple data values; negative values are for special things like \d and so
72 on. Zero means further processing is needed (for things like \x), or the escape
73 is invalid. */
74
75 static const short escapes[] = {
76 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
77 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
78 '@', 0, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
79 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
80 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
81 0, 0, 0, '[', '\\', ']', '^', '_', /* X - _ */
82 '`', 7, -ESC_b, 0, -ESC_d, 0, '\f', 0, /* ` - g */
83 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
84 0, 0, '\r', -ESC_s, '\t', 0, '\v', -ESC_w, /* p - w */
85 0, 0, 0 /* x - z */
86 };
87
88 /* Error code numbers. They are given names so that they can more easily be
89 tracked. */
90
91 enum ErrorCode {
92 ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
93 ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17
94 };
95
96 /* The texts of compile-time error messages. These are "char *" because they
97 are passed to the outside world. */
98
99 static const char* errorText(ErrorCode code)
100 {
101 static const char errorTexts[] =
102 /* 1 */
103 "\\ at end of pattern\0"
104 "\\c at end of pattern\0"
105 "character value in \\x{...} sequence is too large\0"
106 "numbers out of order in {} quantifier\0"
107 /* 5 */
108 "number too big in {} quantifier\0"
109 "missing terminating ] for character class\0"
110 "internal error: code overflow\0"
111 "range out of order in character class\0"
112 "nothing to repeat\0"
113 /* 10 */
114 "unmatched parentheses\0"
115 "internal error: unexpected repeat\0"
116 "unrecognized character after (?\0"
117 "failed to get memory\0"
118 "missing )\0"
119 /* 15 */
120 "reference to non-existent subpattern\0"
121 "regular expression too large\0"
122 "parentheses nested too deeply"
123 ;
124
125 int i = code;
126 const char* text = errorTexts;
127 while (i > 1)
128 i -= !*text++;
129 return text;
130 }
131
132 /* Structure for passing "static" information around between the functions
133 doing the compiling. */
134
135 struct CompileData {
136 CompileData() {
137 top_backref = 0;
138 backrefMap = 0;
139 req_varyopt = 0;
140 needOuterBracket = false;
141 numCapturingBrackets = 0;
142 }
143 int top_backref; /* Maximum back reference */
144 unsigned backrefMap; /* Bitmap of low back refs */
145 int req_varyopt; /* "After variable item" flag for reqbyte */
146 bool needOuterBracket;
147 int numCapturingBrackets;
148 };
149
150 /* Definitions to allow mutual recursion */
151
152 static bool compileBracket(int, int*, unsigned char**, const UChar**, const UCha r*, ErrorCode*, int, int*, int*, CompileData&);
153 static bool bracketIsAnchored(const unsigned char* code);
154 static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap , unsigned backrefMap);
155 static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool ina ssert);
156
157 /*************************************************
158 * Handle escapes *
159 *************************************************/
160
161 /* This function is called when a \ has been encountered. It either returns a
162 positive value for a simple escape such as \n, or a negative value which
163 encodes one of the more complicated things such as \d. When UTF-8 is enabled,
164 a positive value greater than 255 may be returned. On entry, ptr is pointing at
165 the \. On exit, it is on the final character of the escape sequence.
166
167 Arguments:
168 ptrptr points to the pattern position pointer
169 errorcodeptr points to the errorcode variable
170 bracount number of previous extracting brackets
171 options the options bits
172 isclass true if inside a character class
173
174 Returns: zero or positive => a data character
175 negative => a special escape sequence
176 on error, errorptr is set
177 */
178
179 static int checkEscape(const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int bracount, bool isclass)
180 {
181 const UChar* ptr = *ptrptr + 1;
182
183 /* If backslash is at the end of the pattern, it's an error. */
184 if (ptr == patternEnd) {
185 *errorcodeptr = ERR1;
186 *ptrptr = ptr;
187 return 0;
188 }
189
190 int c = *ptr;
191
192 /* Non-alphamerics are literals. For digits or letters, do an initial lookup in
193 a table. A non-zero result is something that can be returned immediately.
194 Otherwise further processing may be required. */
195
196 if (c < '0' || c > 'z') { /* Not alphameric */
197 } else if (int escapeValue = escapes[c - '0']) {
198 c = escapeValue;
199 if (isclass) {
200 if (-c == ESC_b)
201 c = '\b'; /* \b is backslash in a class */
202 else if (-c == ESC_B)
203 c = 'B'; /* and \B is a capital B in a class (in browsers event though ECMAScript 15.10.2.19 says it raises an error) */
204 }
205 /* Escapes that need further processing, or are illegal. */
206
207 } else {
208 switch (c) {
209 case '1':
210 case '2':
211 case '3':
212 case '4':
213 case '5':
214 case '6':
215 case '7':
216 case '8':
217 case '9':
218 /* Escape sequences starting with a non-zero digit are backrefer ences,
219 unless there are insufficient brackets, in which case they are octal
220 escape sequences. Those sequences end on the first non-octal ch aracter
221 or when we overflow 0-255, whichever comes first. */
222
223 if (!isclass) {
224 const UChar* oldptr = ptr;
225 c -= '0';
226 while ((ptr + 1 < patternEnd) && isASCIIDigit(ptr[1]) && c < = bracount)
227 c = c * 10 + *(++ptr) - '0';
228 if (c <= bracount) {
229 c = -(ESC_REF + c);
230 break;
231 }
232 ptr = oldptr; /* Put the pointer back and fall through */
233 }
234
235 /* Handle an octal number following \. If the first digit is 8 o r 9,
236 this is not octal. */
237
238 if ((c = *ptr) >= '8')
239 break;
240
241 /* \0 always starts an octal number, but we may drop through to here with a
242 larger first octal digit. */
243
244 case '0': {
245 c -= '0';
246 int i;
247 for (i = 1; i <= 2; ++i) {
248 if (ptr + i >= patternEnd || ptr[i] < '0' || ptr[i] > '7')
249 break;
250 int cc = c * 8 + ptr[i] - '0';
251 if (cc > 255)
252 break;
253 c = cc;
254 }
255 ptr += i - 1;
256 break;
257 }
258
259 case 'x': {
260 c = 0;
261 int i;
262 for (i = 1; i <= 2; ++i) {
263 if (ptr + i >= patternEnd || !isASCIIHexDigit(ptr[i])) {
264 c = 'x';
265 i = 1;
266 break;
267 }
268 int cc = ptr[i];
269 if (cc >= 'a')
270 cc -= 32; /* Convert to upper case */
271 c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10));
272 }
273 ptr += i - 1;
274 break;
275 }
276
277 case 'u': {
278 c = 0;
279 int i;
280 for (i = 1; i <= 4; ++i) {
281 if (ptr + i >= patternEnd || !isASCIIHexDigit(ptr[i])) {
282 c = 'u';
283 i = 1;
284 break;
285 }
286 int cc = ptr[i];
287 if (cc >= 'a')
288 cc -= 32; /* Convert to upper case */
289 c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10));
290 }
291 ptr += i - 1;
292 break;
293 }
294
295 case 'c':
296 if (++ptr == patternEnd) {
297 *errorcodeptr = ERR2;
298 return 0;
299 }
300 c = *ptr;
301
302 /* A letter is upper-cased; then the 0x40 bit is flipped. This c oding
303 is ASCII-specific, but then the whole concept of \cx is ASCII-s pecific. */
304 c = toASCIIUpper(c) ^ 0x40;
305 break;
306 }
307 }
308
309 *ptrptr = ptr;
310 return c;
311 }
312
313 /*************************************************
314 * Check for counted repeat *
315 *************************************************/
316
317 /* This function is called when a '{' is encountered in a place where it might
318 start a quantifier. It looks ahead to see if it really is a quantifier or not.
319 It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
320 where the ddds are digits.
321
322 Arguments:
323 p pointer to the first char after '{'
324
325 Returns: true or false
326 */
327
328 static bool isCountedRepeat(const UChar* p, const UChar* patternEnd)
329 {
330 if (p >= patternEnd || !isASCIIDigit(*p))
331 return false;
332 p++;
333 while (p < patternEnd && isASCIIDigit(*p))
334 p++;
335 if (p < patternEnd && *p == '}')
336 return true;
337
338 if (p >= patternEnd || *p++ != ',')
339 return false;
340 if (p < patternEnd && *p == '}')
341 return true;
342
343 if (p >= patternEnd || !isASCIIDigit(*p))
344 return false;
345 p++;
346 while (p < patternEnd && isASCIIDigit(*p))
347 p++;
348
349 return (p < patternEnd && *p == '}');
350 }
351
352 /*************************************************
353 * Read repeat counts *
354 *************************************************/
355
356 /* Read an item of the form {n,m} and return the values. This is called only
357 after isCountedRepeat() has confirmed that a repeat-count quantifier exists,
358 so the syntax is guaranteed to be correct, but we need to check the values.
359
360 Arguments:
361 p pointer to first char after '{'
362 minp pointer to int for min
363 maxp pointer to int for max
364 returned as -1 if no max
365 errorcodeptr points to error code variable
366
367 Returns: pointer to '}' on success;
368 current ptr on error, with errorcodeptr set non-zero
369 */
370
371 static const UChar* readRepeatCounts(const UChar* p, int* minp, int* maxp, Error Code* errorcodeptr)
372 {
373 int min = 0;
374 int max = -1;
375
376 /* Read the minimum value and do a paranoid check: a negative value indicate s
377 an integer overflow. */
378
379 while (isASCIIDigit(*p))
380 min = min * 10 + *p++ - '0';
381 if (min < 0 || min > 65535) {
382 *errorcodeptr = ERR5;
383 return p;
384 }
385
386 /* Read the maximum value if there is one, and again do a paranoid on its si ze.
387 Also, max must not be less than min. */
388
389 if (*p == '}')
390 max = min;
391 else {
392 if (*(++p) != '}') {
393 max = 0;
394 while (isASCIIDigit(*p))
395 max = max * 10 + *p++ - '0';
396 if (max < 0 || max > 65535) {
397 *errorcodeptr = ERR5;
398 return p;
399 }
400 if (max < min) {
401 *errorcodeptr = ERR4;
402 return p;
403 }
404 }
405 }
406
407 /* Fill in the required variables, and pass back the pointer to the terminat ing
408 '}'. */
409
410 *minp = min;
411 *maxp = max;
412 return p;
413 }
414
415 /*************************************************
416 * Find first significant op code *
417 *************************************************/
418
419 /* This is called by several functions that scan a compiled expression looking
420 for a fixed first character, or an anchoring op code etc. It skips over things
421 that do not influence this.
422
423 Arguments:
424 code pointer to the start of the group
425 Returns: pointer to the first significant opcode
426 */
427
428 static const unsigned char* firstSignificantOpcode(const unsigned char* code)
429 {
430 while (*code == OP_BRANUMBER)
431 code += 3;
432 return code;
433 }
434
435 static const unsigned char* firstSignificantOpcodeSkippingAssertions(const unsig ned char* code)
436 {
437 while (true) {
438 switch (*code) {
439 case OP_ASSERT_NOT:
440 advanceToEndOfBracket(code);
441 code += 1 + LINK_SIZE;
442 break;
443 case OP_WORD_BOUNDARY:
444 case OP_NOT_WORD_BOUNDARY:
445 ++code;
446 break;
447 case OP_BRANUMBER:
448 code += 3;
449 break;
450 default:
451 return code;
452 }
453 }
454 }
455
456 /*************************************************
457 * Get othercase range *
458 *************************************************/
459
460 /* This function is passed the start and end of a class range, in UTF-8 mode
461 with UCP support. It searches up the characters, looking for internal ranges of
462 characters in the "other" case. Each call returns the next one, updating the
463 start address.
464
465 Arguments:
466 cptr points to starting character value; updated
467 d end value
468 ocptr where to put start of othercase range
469 odptr where to put end of othercase range
470
471 Yield: true when range returned; false when no more
472 */
473
474 static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr)
475 {
476 int c, othercase = 0;
477
478 for (c = *cptr; c <= d; c++) {
479 if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0)
480 break;
481 }
482
483 if (c > d)
484 return false;
485
486 *ocptr = othercase;
487 int next = othercase + 1;
488
489 for (++c; c <= d; c++) {
490 if (kjs_pcre_ucp_othercase(c) != next)
491 break;
492 next++;
493 }
494
495 *odptr = next - 1;
496 *cptr = c;
497
498 return true;
499 }
500
501 /*************************************************
502 * Convert character value to UTF-8 *
503 *************************************************/
504
505 /* This function takes an integer value in the range 0 - 0x7fffffff
506 and encodes it as a UTF-8 character in 0 to 6 bytes.
507
508 Arguments:
509 cvalue the character value
510 buffer pointer to buffer for result - at least 6 bytes long
511
512 Returns: number of characters placed in the buffer
513 */
514
515 static int encodeUTF8(int cvalue, unsigned char *buffer)
516 {
517 int i;
518 for (i = 0; i < kjs_pcre_utf8_table1_size; i++)
519 if (cvalue <= kjs_pcre_utf8_table1[i])
520 break;
521 buffer += i;
522 for (int j = i; j > 0; j--) {
523 *buffer-- = 0x80 | (cvalue & 0x3f);
524 cvalue >>= 6;
525 }
526 *buffer = kjs_pcre_utf8_table2[i] | cvalue;
527 return i + 1;
528 }
529
530 /*************************************************
531 * Compile one branch *
532 *************************************************/
533
534 /* Scan the pattern, compiling it into the code vector.
535
536 Arguments:
537 options the option bits
538 brackets points to number of extracting brackets used
539 codeptr points to the pointer to the current code point
540 ptrptr points to the current pattern pointer
541 errorcodeptr points to error code variable
542 firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
543 reqbyteptr set to the last literal character required, else < 0
544 cd contains pointers to tables etc.
545
546 Returns: true on success
547 false, with *errorcodeptr set non-zero on error
548 */
549
550 static inline bool safelyCheckNextChar(const UChar* ptr, const UChar* patternEnd , UChar expected)
551 {
552 return ((ptr + 1 < patternEnd) && ptr[1] == expected);
553 }
554
555 static bool
556 compileBranch(int options, int* brackets, unsigned char** codeptr,
557 const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorco deptr, int *firstbyteptr,
558 int* reqbyteptr, CompileData& cd)
559 {
560 int repeat_type, op_type;
561 int repeat_min = 0, repeat_max = 0; /* To please picky compilers */
562 int bravalue = 0;
563 int reqvary, tempreqvary;
564 int c;
565 unsigned char* code = *codeptr;
566 unsigned char* tempcode;
567 bool groupsetfirstbyte = false;
568 const UChar* ptr = *ptrptr;
569 const UChar* tempptr;
570 unsigned char* previous = NULL;
571 unsigned char classbits[32];
572
573 bool class_utf8;
574 unsigned char* class_utf8data;
575 unsigned char utf8_char[6];
576
577 /* Initialize no first byte, no required byte. REQ_UNSET means "no char
578 matching encountered yet". It gets changed to REQ_NONE if we hit something that
579 matches a non-fixed char first char; reqbyte just remains unset if we never
580 find one.
581
582 When we hit a repeat whose minimum is zero, we may have to adjust these val ues
583 to take the zero repeat into account. This is implemented by setting them t o
584 zerofirstbyte and zeroreqbyte when such a repeat is encountered. The indivi dual
585 item types that can be repeated set these backoff variables appropriately. */
586
587 int firstbyte = REQ_UNSET;
588 int reqbyte = REQ_UNSET;
589 int zeroreqbyte = REQ_UNSET;
590 int zerofirstbyte = REQ_UNSET;
591
592 /* The variable req_caseopt contains either the REQ_IGNORE_CASE value or zer o,
593 according to the current setting of the ignores-case flag. REQ_IGNORE_CASE is a bit
594 value > 255. It is added into the firstbyte or reqbyte variables to record the
595 case status of the value. This is used only for ASCII characters. */
596
597 int req_caseopt = (options & IgnoreCaseOption) ? REQ_IGNORE_CASE : 0;
598
599 /* Switch on next character until the end of the branch */
600
601 for (;; ptr++) {
602 bool negate_class;
603 bool should_flip_negation; /* If a negative special such as \S is used, we should negate the whole class to properly support Unicode. */
604 int class_charcount;
605 int class_lastchar;
606 int skipbytes;
607 int subreqbyte;
608 int subfirstbyte;
609 int mclength;
610 unsigned char mcbuffer[8];
611
612 /* Next byte in the pattern */
613
614 c = ptr < patternEnd ? *ptr : 0;
615
616 /* Fill in length of a previous callout, except when the next thing is
617 a quantifier. */
618
619 bool is_quantifier = c == '*' || c == '+' || c == '?' || (c == '{' && is CountedRepeat(ptr + 1, patternEnd));
620
621 switch (c) {
622 /* The branch terminates at end of string, |, or ). */
623
624 case 0:
625 if (ptr < patternEnd)
626 goto NORMAL_CHAR;
627 // End of string; fall through
628 case '|':
629 case ')':
630 *firstbyteptr = firstbyte;
631 *reqbyteptr = reqbyte;
632 *codeptr = code;
633 *ptrptr = ptr;
634 return true;
635
636 /* Handle single-character metacharacters. In multiline mode, ^ disa bles
637 the setting of any following char as a first character. */
638
639 case '^':
640 if (options & MatchAcrossMultipleLinesOption) {
641 if (firstbyte == REQ_UNSET)
642 firstbyte = REQ_NONE;
643 *code++ = OP_BOL;
644 } else
645 *code++ = OP_CIRC;
646 previous = NULL;
647 break;
648
649 case '$':
650 previous = NULL;
651 if (options & MatchAcrossMultipleLinesOption)
652 *code++ = OP_EOL;
653 else
654 *code++ = OP_DOLL;
655 break;
656
657 /* There can never be a first char if '.' is first, whatever happens about
658 repeats. The value of reqbyte doesn't change either. */
659
660 case '.':
661 if (firstbyte == REQ_UNSET)
662 firstbyte = REQ_NONE;
663 zerofirstbyte = firstbyte;
664 zeroreqbyte = reqbyte;
665 previous = code;
666 *code++ = OP_NOT_NEWLINE;
667 break;
668
669 /* Character classes. If the included characters are all < 256, we b uild a
670 32-byte bitmap of the permitted characters, except in the special c ase
671 where there is only one such character. For negated classes, we bui ld the
672 map as usual, then invert it at the end. However, we use a differen t opcode
673 so that data characters > 255 can be handled correctly.
674
675 If the class contains characters outside the 0-255 range, a differe nt
676 opcode is compiled. It may optionally have a bit map for characters < 256,
677 but those above are are explicitly listed afterwards. A flag byte t ells
678 whether the bitmap is present, and whether this is a negated class or not.
679 */
680
681 case '[': {
682 previous = code;
683 should_flip_negation = false;
684
685 /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
686 they are encountered at the top level, so we'll do that too. */
687
688 /* If the first character is '^', set the negation flag and skip it. */
689
690 if (ptr + 1 >= patternEnd) {
691 *errorcodeptr = ERR6;
692 return false;
693 }
694
695 if (ptr[1] == '^') {
696 negate_class = true;
697 ++ptr;
698 } else
699 negate_class = false;
700
701 /* Keep a count of chars with values < 256 so that we can optimi ze the case
702 of just a single character (as long as it's < 256). For higher valued UTF-8
703 characters, we don't yet do any optimization. */
704
705 class_charcount = 0;
706 class_lastchar = -1;
707
708 class_utf8 = false; /* No chars >= 256 */
709 class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */
710
711 /* Initialize the 32-char bit map to all zeros. We have to build the
712 map in a temporary bit of store, in case the class contains onl y 1
713 character (< 256), because in that case the compiled code doesn 't use the
714 bit map. */
715
716 memset(classbits, 0, 32 * sizeof(unsigned char));
717
718 /* Process characters until ] is reached. The first pass
719 through the regex checked the overall syntax, so we don't need to be very
720 strict here. At the start of the loop, c contains the first byt e of the
721 character. */
722
723 while ((++ptr < patternEnd) && (c = *ptr) != ']') {
724 /* Backslash may introduce a single character, or it may int roduce one
725 of the specials, which just set a flag. Escaped items are c hecked for
726 validity in the pre-compiling pass. The sequence \b is a sp ecial case.
727 Inside a class (and only there) it is treated as backspace. Elsewhere
728 it marks a word boundary. Other escapes have preset maps re ady to
729 or into the one we are building. We assume they have more t han one
730 character in them, so set class_charcount bigger than one. */
731
732 if (c == '\\') {
733 c = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCa pturingBrackets, true);
734 if (c < 0) {
735 class_charcount += 2; /* Greater than 1 is what matters */
736 switch (-c) {
737 case ESC_d:
738 for (c = 0; c < 32; c++)
739 classbits[c] |= classBitmapForChar(c + c bit_digit);
740 continue;
741
742 case ESC_D:
743 should_flip_negation = true;
744 for (c = 0; c < 32; c++)
745 classbits[c] |= ~classBitmapForChar(c + cbit_digit);
746 continue;
747
748 case ESC_w:
749 for (c = 0; c < 32; c++)
750 classbits[c] |= classBitmapForChar(c + c bit_word);
751 continue;
752
753 case ESC_W:
754 should_flip_negation = true;
755 for (c = 0; c < 32; c++)
756 classbits[c] |= ~classBitmapForChar(c + cbit_word);
757 continue;
758
759 case ESC_s:
760 for (c = 0; c < 32; c++)
761 classbits[c] |= classBitmapForChar(c + cbit_space);
762 continue;
763
764 case ESC_S:
765 should_flip_negation = true;
766 for (c = 0; c < 32; c++)
767 classbits[c] |= ~classBitmapForChar(c + cbit_space);
768 continue;
769
770 /* Unrecognized escapes are faulted if PCRE is running in its
771 strict mode. By default, for compatibility with Perl, they are
772 treated as literals. */
773
774 default:
775 c = *ptr; /* The final characte r */
776 class_charcount -= 2; /* Undo the default c ount from above */
777 }
778 }
779
780 /* Fall through if we have a single character (c >= 0). This may be
781 > 256 in UTF-8 mode. */
782
783 } /* End of backslash handling */
784
785 /* A single character may be followed by '-' to form a range . However,
786 Perl does not permit ']' to be the end of the range. A '-' character
787 here is treated as a literal. */
788
789 if ((ptr + 2 < patternEnd) && ptr[1] == '-' && ptr[2] != ']' ) {
790 ptr += 2;
791
792 int d = *ptr;
793
794 /* The second part of a range can be a single-character escape, but
795 not any of the other escapes. Perl 5.6 treats a hyphen as a literal
796 in such circumstances. */
797
798 if (d == '\\') {
799 const UChar* oldptr = ptr;
800 d = checkEscape(&ptr, patternEnd, errorcodeptr, cd.n umCapturingBrackets, true);
801
802 /* \X is literal X; any other special means the '-' was literal */
803 if (d < 0) {
804 ptr = oldptr - 2;
805 goto LONE_SINGLE_CHARACTER; /* A few lines belo w */
806 }
807 }
808
809 /* The check that the two values are in the correct orde r happens in
810 the pre-pass. Optimize one-character ranges */
811
812 if (d == c)
813 goto LONE_SINGLE_CHARACTER; /* A few lines below */
814
815 /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
816 matching, we have to use an XCLASS with extra data item s. Caseless
817 matching for characters > 127 is available only if UCP support is
818 available. */
819
820 if ((d > 255 || ((options & IgnoreCaseOption) && d > 127 ))) {
821 class_utf8 = true;
822
823 /* With UCP support, we can find the other case equi valents of
824 the relevant characters. There may be several range s. Optimize how
825 they fit with the basic range. */
826
827 if (options & IgnoreCaseOption) {
828 int occ, ocd;
829 int cc = c;
830 int origd = d;
831 while (getOthercaseRange(&cc, origd, &occ, &ocd) ) {
832 if (occ >= c && ocd <= d)
833 continue; /* Skip embedded ranges */
834
835 if (occ < c && ocd >= c - 1) /* Exte nd the basic range */
836 { /* if the re is overlap, */
837 c = occ; /* no ting that if occ < c */
838 continue; /* we can't have ocd > d */
839 } /* becaus e a subrange is */
840 if (ocd > d && occ <= d + 1) /* alwa ys shorter than */
841 { /* the ba sic range. */
842 d = ocd;
843 continue;
844 }
845
846 if (occ == ocd)
847 *class_utf8data++ = XCL_SINGLE;
848 else {
849 *class_utf8data++ = XCL_RANGE;
850 class_utf8data += encodeUTF8(occ, class_ utf8data);
851 }
852 class_utf8data += encodeUTF8(ocd, class_utf8 data);
853 }
854 }
855
856 /* Now record the original range, possibly modified for UCP caseless
857 overlapping ranges. */
858
859 *class_utf8data++ = XCL_RANGE;
860 class_utf8data += encodeUTF8(c, class_utf8data);
861 class_utf8data += encodeUTF8(d, class_utf8data);
862
863 /* With UCP support, we are done. Without UCP suppor t, there is no
864 caseless matching for UTF-8 characters > 127; we ca n use the bit map
865 for the smaller ones. */
866
867 continue; /* With next character in the class */
868 }
869
870 /* We use the bit map for all cases when not in UTF-8 mo de; else
871 ranges that lie entirely within 0-127 when there is UCP support; else
872 for partial ranges without UCP support. */
873
874 for (; c <= d; c++) {
875 classbits[c/8] |= (1 << (c&7));
876 if (options & IgnoreCaseOption) {
877 int uc = flipCase(c);
878 classbits[uc/8] |= (1 << (uc&7));
879 }
880 class_charcount++; /* in case a one-c har range */
881 class_lastchar = c;
882 }
883
884 continue; /* Go get the next char in the class */
885 }
886
887 /* Handle a lone single character - we can get here for a no rmal
888 non-escape char, or after \ that introduces a single charac ter or for an
889 apparent range that isn't. */
890
891 LONE_SINGLE_CHARACTER:
892
893 /* Handle a character that cannot go in the bit map */
894
895 if ((c > 255 || ((options & IgnoreCaseOption) && c > 127))) {
896 class_utf8 = true;
897 *class_utf8data++ = XCL_SINGLE;
898 class_utf8data += encodeUTF8(c, class_utf8data);
899
900 if (options & IgnoreCaseOption) {
901 int othercase;
902 if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0) {
903 *class_utf8data++ = XCL_SINGLE;
904 class_utf8data += encodeUTF8(othercase, class_ut f8data);
905 }
906 }
907 } else {
908 /* Handle a single-byte character */
909 classbits[c/8] |= (1 << (c&7));
910 if (options & IgnoreCaseOption) {
911 c = flipCase(c);
912 classbits[c/8] |= (1 << (c&7));
913 }
914 class_charcount++;
915 class_lastchar = c;
916 }
917 }
918
919 /* If class_charcount is 1, we saw precisely one character whose value is
920 less than 256. In non-UTF-8 mode we can always optimize. In UTF -8 mode, we
921 can optimize the negative case only if there were no characters >= 128
922 because OP_NOT and the related opcodes like OP_NOTSTAR operate on
923 single-bytes only. This is an historical hangover. Maybe one da y we can
924 tidy these opcodes to handle multi-byte characters.
925
926 The optimization throws away the bit map. We turn the item into a
927 1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's neg ative. Note
928 that OP_NOT does not support multibyte characters. In the posit ive case, it
929 can cause firstbyte to be set. Otherwise, there can be no first char if
930 this item is first, whatever repeat count may follow. In the ca se of
931 reqbyte, save the previous value for reinstating. */
932
933 if (class_charcount == 1 && (!class_utf8 && (!negate_class || cl ass_lastchar < 128))) {
934 zeroreqbyte = reqbyte;
935
936 /* The OP_NOT opcode works on one-byte characters only. */
937
938 if (negate_class) {
939 if (firstbyte == REQ_UNSET)
940 firstbyte = REQ_NONE;
941 zerofirstbyte = firstbyte;
942 *code++ = OP_NOT;
943 *code++ = class_lastchar;
944 break;
945 }
946
947 /* For a single, positive character, get the value into c, a nd
948 then we can handle this with the normal one-character code. */
949
950 c = class_lastchar;
951 goto NORMAL_CHAR;
952 } /* End of 1-char optimization */
953
954 /* The general case - not the one-char optimization. If this is the first
955 thing in the branch, there can be no first char setting, whatev er the
956 repeat count. Any reqbyte setting must remain unchanged after a ny kind of
957 repeat. */
958
959 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
960 zerofirstbyte = firstbyte;
961 zeroreqbyte = reqbyte;
962
963 /* If there are characters with values > 255, we have to compile an
964 extended class, with its own opcode. If there are no characters < 256,
965 we can omit the bitmap. */
966
967 if (class_utf8 && !should_flip_negation) {
968 *class_utf8data++ = XCL_END; /* Marks the end of extra da ta */
969 *code++ = OP_XCLASS;
970 code += LINK_SIZE;
971 *code = negate_class? XCL_NOT : 0;
972
973 /* If the map is required, install it, and move on to the en d of
974 the extra data */
975
976 if (class_charcount > 0) {
977 *code++ |= XCL_MAP;
978 memcpy(code, classbits, 32);
979 code = class_utf8data;
980 }
981
982 /* If the map is not required, slide down the extra data. */
983
984 else {
985 int len = class_utf8data - (code + 33);
986 memmove(code + 1, code + 33, len);
987 code += len + 1;
988 }
989
990 /* Now fill in the complete length of the item */
991
992 putLinkValue(previous + 1, code - previous);
993 break; /* End of class handling */
994 }
995
996 /* If there are no characters > 255, negate the 32-byte map if n ecessary,
997 and copy it into the code vector. If this is the first thing in the branch,
998 there can be no first char setting, whatever the repeat count. Any reqbyte
999 setting must remain unchanged after any kind of repeat. */
1000
1001 *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP _NCLASS;
1002 if (negate_class)
1003 for (c = 0; c < 32; c++)
1004 code[c] = ~classbits[c];
1005 else
1006 memcpy(code, classbits, 32);
1007 code += 32;
1008 break;
1009 }
1010
1011 /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
1012 has been tested above. */
1013
1014 case '{':
1015 if (!is_quantifier)
1016 goto NORMAL_CHAR;
1017 ptr = readRepeatCounts(ptr + 1, &repeat_min, &repeat_max, errorc odeptr);
1018 if (*errorcodeptr)
1019 goto FAILED;
1020 goto REPEAT;
1021
1022 case '*':
1023 repeat_min = 0;
1024 repeat_max = -1;
1025 goto REPEAT;
1026
1027 case '+':
1028 repeat_min = 1;
1029 repeat_max = -1;
1030 goto REPEAT;
1031
1032 case '?':
1033 repeat_min = 0;
1034 repeat_max = 1;
1035
1036 REPEAT:
1037 if (!previous) {
1038 *errorcodeptr = ERR9;
1039 goto FAILED;
1040 }
1041
1042 if (repeat_min == 0) {
1043 firstbyte = zerofirstbyte; /* Adjust for zero repeat */
1044 reqbyte = zeroreqbyte; /* Ditto */
1045 }
1046
1047 /* Remember whether this is a variable length repeat */
1048
1049 reqvary = (repeat_min == repeat_max) ? 0 : REQ_VARY;
1050
1051 op_type = 0; /* Default single-char op codes */
1052
1053 /* Save start of previous item, in case we have to move it up to make space
1054 for an inserted OP_ONCE for the additional '+' extension. */
1055 /* FIXME: Probably don't need this because we don't use OP_ONCE. */
1056
1057 tempcode = previous;
1058
1059 /* If the next character is '+', we have a possessive quantifier . This
1060 implies greediness, whatever the setting of the PCRE_UNGREEDY o ption.
1061 If the next character is '?' this is a minimizing repeat, by de fault,
1062 but if PCRE_UNGREEDY is set, it works the other way round. We c hange the
1063 repeat type to the non-default. */
1064
1065 if (safelyCheckNextChar(ptr, patternEnd, '?')) {
1066 repeat_type = 1;
1067 ptr++;
1068 } else
1069 repeat_type = 0;
1070
1071 /* If previous was a character match, abolish the item and gener ate a
1072 repeat item instead. If a char item has a minumum of more than one, ensure
1073 that it is set in reqbyte - it might not be if a sequence such as x{3} is
1074 the first thing in a branch because the x will have gone into f irstbyte
1075 instead. */
1076
1077 if (*previous == OP_CHAR || *previous == OP_CHAR_IGNORING_CASE) {
1078 /* Deal with UTF-8 characters that take up more than one byt e. It's
1079 easier to write this out separately than try to macrify it. Use c to
1080 hold the length of the character in bytes, plus 0x80 to fla g that it's a
1081 length rather than a small character. */
1082
1083 if (code[-1] & 0x80) {
1084 unsigned char *lastchar = code - 1;
1085 while((*lastchar & 0xc0) == 0x80)
1086 lastchar--;
1087 c = code - lastchar; /* Length of UTF-8 chara cter */
1088 memcpy(utf8_char, lastchar, c); /* Save the char */
1089 c |= 0x80; /* Flag c as a length */
1090 }
1091 else {
1092 c = code[-1];
1093 if (repeat_min > 1)
1094 reqbyte = c | req_caseopt | cd.req_varyopt;
1095 }
1096
1097 goto OUTPUT_SINGLE_REPEAT; /* Code shared with single char acter types */
1098 }
1099
1100 else if (*previous == OP_ASCII_CHAR || *previous == OP_ASCII_LET TER_IGNORING_CASE) {
1101 c = previous[1];
1102 if (repeat_min > 1)
1103 reqbyte = c | req_caseopt | cd.req_varyopt;
1104 goto OUTPUT_SINGLE_REPEAT;
1105 }
1106
1107 /* If previous was a single negated character ([^a] or similar), we use
1108 one of the special opcodes, replacing it. The code is shared wi th single-
1109 character repeats by setting opt_type to add a suitable offset into
1110 repeat_type. OP_NOT is currently used only for single-byte char s. */
1111
1112 else if (*previous == OP_NOT) {
1113 op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
1114 c = previous[1];
1115 goto OUTPUT_SINGLE_REPEAT;
1116 }
1117
1118 /* If previous was a character type match (\d or similar), aboli sh it and
1119 create a suitable repeat item. The code is shared with single-c haracter
1120 repeats by setting op_type to add a suitable offset into repeat _type. */
1121
1122 else if (*previous <= OP_NOT_NEWLINE) {
1123 op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
1124 c = *previous;
1125
1126 OUTPUT_SINGLE_REPEAT:
1127 int prop_type = -1;
1128 int prop_value = -1;
1129
1130 unsigned char* oldcode = code;
1131 code = previous; /* Usually overwrite previ ous item */
1132
1133 /* If the maximum is zero then the minimum must also be zero ; Perl allows
1134 this case, so we do too - by simply omitting the item altog ether. */
1135
1136 if (repeat_max == 0)
1137 goto END_REPEAT;
1138
1139 /* Combine the op_type with the repeat_type */
1140
1141 repeat_type += op_type;
1142
1143 /* A minimum of zero is handled either as the special case * or ?, or as
1144 an UPTO, with the maximum given. */
1145
1146 if (repeat_min == 0) {
1147 if (repeat_max == -1)
1148 *code++ = OP_STAR + repeat_type;
1149 else if (repeat_max == 1)
1150 *code++ = OP_QUERY + repeat_type;
1151 else {
1152 *code++ = OP_UPTO + repeat_type;
1153 put2ByteValueAndAdvance(code, repeat_max);
1154 }
1155 }
1156
1157 /* A repeat minimum of 1 is optimized into some special case s. If the
1158 maximum is unlimited, we use OP_PLUS. Otherwise, the origin al item it
1159 left in place and, if the maximum is greater than 1, we use OP_UPTO with
1160 one less than the maximum. */
1161
1162 else if (repeat_min == 1) {
1163 if (repeat_max == -1)
1164 *code++ = OP_PLUS + repeat_type;
1165 else {
1166 code = oldcode; /* leave previous it em in place */
1167 if (repeat_max == 1)
1168 goto END_REPEAT;
1169 *code++ = OP_UPTO + repeat_type;
1170 put2ByteValueAndAdvance(code, repeat_max - 1);
1171 }
1172 }
1173
1174 /* The case {n,n} is just an EXACT, while the general case { n,m} is
1175 handled as an EXACT followed by an UPTO. */
1176
1177 else {
1178 *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
1179 put2ByteValueAndAdvance(code, repeat_min);
1180
1181 /* If the maximum is unlimited, insert an OP_STAR. Befor e doing so,
1182 we have to insert the character for the previous code. For a repeated
1183 Unicode property match, there are two extra bytes that define the
1184 required property. In UTF-8 mode, long characters have their length in
1185 c, with the 0x80 bit as a flag. */
1186
1187 if (repeat_max < 0) {
1188 if (c >= 128) {
1189 memcpy(code, utf8_char, c & 7);
1190 code += c & 7;
1191 } else {
1192 *code++ = c;
1193 if (prop_type >= 0) {
1194 *code++ = prop_type;
1195 *code++ = prop_value;
1196 }
1197 }
1198 *code++ = OP_STAR + repeat_type;
1199 }
1200
1201 /* Else insert an UPTO if the max is greater than the mi n, again
1202 preceded by the character, for the previously inserted code. */
1203
1204 else if (repeat_max != repeat_min) {
1205 if (c >= 128) {
1206 memcpy(code, utf8_char, c & 7);
1207 code += c & 7;
1208 } else
1209 *code++ = c;
1210 if (prop_type >= 0) {
1211 *code++ = prop_type;
1212 *code++ = prop_value;
1213 }
1214 repeat_max -= repeat_min;
1215 *code++ = OP_UPTO + repeat_type;
1216 put2ByteValueAndAdvance(code, repeat_max);
1217 }
1218 }
1219
1220 /* The character or character type itself comes last in all cases. */
1221
1222 if (c >= 128) {
1223 memcpy(code, utf8_char, c & 7);
1224 code += c & 7;
1225 } else
1226 *code++ = c;
1227
1228 /* For a repeated Unicode property match, there are two extr a bytes that
1229 define the required property. */
1230
1231 if (prop_type >= 0) {
1232 *code++ = prop_type;
1233 *code++ = prop_value;
1234 }
1235 }
1236
1237 /* If previous was a character class or a back reference, we put the repeat
1238 stuff after it, but just skip the item if the repeat was {0,0}. */
1239
1240 else if (*previous == OP_CLASS ||
1241 *previous == OP_NCLASS ||
1242 *previous == OP_XCLASS ||
1243 *previous == OP_REF)
1244 {
1245 if (repeat_max == 0) {
1246 code = previous;
1247 goto END_REPEAT;
1248 }
1249
1250 if (repeat_min == 0 && repeat_max == -1)
1251 *code++ = OP_CRSTAR + repeat_type;
1252 else if (repeat_min == 1 && repeat_max == -1)
1253 *code++ = OP_CRPLUS + repeat_type;
1254 else if (repeat_min == 0 && repeat_max == 1)
1255 *code++ = OP_CRQUERY + repeat_type;
1256 else {
1257 *code++ = OP_CRRANGE + repeat_type;
1258 put2ByteValueAndAdvance(code, repeat_min);
1259 if (repeat_max == -1)
1260 repeat_max = 0; /* 2-byte encoding for max */
1261 put2ByteValueAndAdvance(code, repeat_max);
1262 }
1263 }
1264
1265 /* If previous was a bracket group, we may have to replicate it in certain
1266 cases. */
1267
1268 else if (*previous >= OP_BRA) {
1269 int ketoffset = 0;
1270 int len = code - previous;
1271 unsigned char* bralink = NULL;
1272
1273 /* If the maximum repeat count is unlimited, find the end of the bracket
1274 by scanning through from the start, and compute the offset back to it
1275 from the current code pointer. There may be an OP_OPT setti ng following
1276 the final KET, so we can't find the end just by going back from the code
1277 pointer. */
1278
1279 if (repeat_max == -1) {
1280 const unsigned char* ket = previous;
1281 advanceToEndOfBracket(ket);
1282 ketoffset = code - ket;
1283 }
1284
1285 /* The case of a zero minimum is special because of the need to stick
1286 OP_BRAZERO in front of it, and because the group appears on ce in the
1287 data, whereas in other cases it appears the minimum number of times. For
1288 this reason, it is simplest to treat this case separately, as otherwise
1289 the code gets far too messy. There are several special subc ases when the
1290 minimum is zero. */
1291
1292 if (repeat_min == 0) {
1293 /* If the maximum is also zero, we just omit the group f rom the output
1294 altogether. */
1295
1296 if (repeat_max == 0) {
1297 code = previous;
1298 goto END_REPEAT;
1299 }
1300
1301 /* If the maximum is 1 or unlimited, we just have to sti ck in the
1302 BRAZERO and do no more at this point. However, we do ne ed to adjust
1303 any OP_RECURSE calls inside the group that refer to the group itself or
1304 any internal group, because the offset is from the star t of the whole
1305 regex. Temporarily terminate the pattern while doing th is. */
1306
1307 if (repeat_max <= 1) {
1308 *code = OP_END;
1309 memmove(previous+1, previous, len);
1310 code++;
1311 *previous++ = OP_BRAZERO + repeat_type;
1312 }
1313
1314 /* If the maximum is greater than 1 and limited, we have to replicate
1315 in a nested fashion, sticking OP_BRAZERO before each se t of brackets.
1316 The first one has to be handled carefully because it's the original
1317 copy, which has to be moved up. The remainder can be ha ndled by code
1318 that is common with the non-zero minimum case below. We have to
1319 adjust the value of repeat_max, since one less copy is required. */
1320
1321 else {
1322 *code = OP_END;
1323 memmove(previous + 2 + LINK_SIZE, previous, len);
1324 code += 2 + LINK_SIZE;
1325 *previous++ = OP_BRAZERO + repeat_type;
1326 *previous++ = OP_BRA;
1327
1328 /* We chain together the bracket offset fields that have to be
1329 filled in later when the ends of the brackets are r eached. */
1330
1331 int offset = (!bralink) ? 0 : previous - bralink;
1332 bralink = previous;
1333 putLinkValueAllowZeroAndAdvance(previous, offset);
1334 }
1335
1336 repeat_max--;
1337 }
1338
1339 /* If the minimum is greater than zero, replicate the group as many
1340 times as necessary, and adjust the maximum to the number of subsequent
1341 copies that we need. If we set a first char from the group, and didn't
1342 set a required char, copy the latter from the former. */
1343
1344 else {
1345 if (repeat_min > 1) {
1346 if (groupsetfirstbyte && reqbyte < 0)
1347 reqbyte = firstbyte;
1348 for (int i = 1; i < repeat_min; i++) {
1349 memcpy(code, previous, len);
1350 code += len;
1351 }
1352 }
1353 if (repeat_max > 0)
1354 repeat_max -= repeat_min;
1355 }
1356
1357 /* This code is common to both the zero and non-zero minimum cases. If
1358 the maximum is limited, it replicates the group in a nested fashion,
1359 remembering the bracket starts on a stack. In the case of a zero minimum,
1360 the first one was set up above. In all cases the repeat_max now specifies
1361 the number of additional copies needed. */
1362
1363 if (repeat_max >= 0) {
1364 for (int i = repeat_max - 1; i >= 0; i--) {
1365 *code++ = OP_BRAZERO + repeat_type;
1366
1367 /* All but the final copy start a new nesting, maint aining the
1368 chain of brackets outstanding. */
1369
1370 if (i != 0) {
1371 *code++ = OP_BRA;
1372 int offset = (!bralink) ? 0 : code - bralink;
1373 bralink = code;
1374 putLinkValueAllowZeroAndAdvance(code, offset);
1375 }
1376
1377 memcpy(code, previous, len);
1378 code += len;
1379 }
1380
1381 /* Now chain through the pending brackets, and fill in t heir length
1382 fields (which are holding the chain links pro tem). */
1383
1384 while (bralink) {
1385 int offset = code - bralink + 1;
1386 unsigned char* bra = code - offset;
1387 int oldlinkoffset = getLinkValueAllowZero(bra + 1);
1388 bralink = (!oldlinkoffset) ? 0 : bralink - oldlinkof fset;
1389 *code++ = OP_KET;
1390 putLinkValueAndAdvance(code, offset);
1391 putLinkValue(bra + 1, offset);
1392 }
1393 }
1394
1395 /* If the maximum is unlimited, set a repeater in the final copy. We
1396 can't just offset backwards from the current code point, be cause we
1397 don't know if there's been an options resetting after the k et. The
1398 correct offset was computed above. */
1399
1400 else
1401 code[-ketoffset] = OP_KETRMAX + repeat_type;
1402 }
1403
1404 /* Else there's some kind of shambles */
1405
1406 else {
1407 *errorcodeptr = ERR11;
1408 goto FAILED;
1409 }
1410
1411 /* In all case we no longer have a previous item. We also set th e
1412 "follows varying string" flag for subsequently encountered reqb ytes if
1413 it isn't already set and we have just passed a varying length i tem. */
1414
1415 END_REPEAT:
1416 previous = NULL;
1417 cd.req_varyopt |= reqvary;
1418 break;
1419
1420 /* Start of nested bracket sub-expression, or comment or lookahead o r
1421 lookbehind or option setting or condition. First deal with special things
1422 that can come after a bracket; all are introduced by ?, and the app earance
1423 of any of them means that this is not a referencing group. They wer e
1424 checked for validity in the first pass over the string, so we don't have to
1425 check for syntax errors here. */
1426
1427 case '(':
1428 skipbytes = 0;
1429
1430 if (*(++ptr) == '?') {
1431 switch (*(++ptr)) {
1432 case ':': /* Non-extracting bracket */
1433 bravalue = OP_BRA;
1434 ptr++;
1435 break;
1436
1437 case '=': /* Positive lookahead */
1438 bravalue = OP_ASSERT;
1439 ptr++;
1440 break;
1441
1442 case '!': /* Negative lookahead */
1443 bravalue = OP_ASSERT_NOT;
1444 ptr++;
1445 break;
1446
1447 /* Character after (? not specially recognized */
1448
1449 default:
1450 *errorcodeptr = ERR12;
1451 goto FAILED;
1452 }
1453 }
1454
1455 /* Else we have a referencing group; adjust the opcode. If the b racket
1456 number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and
1457 arrange for the true number to follow later, in an OP_BRANUMBER item. */
1458
1459 else {
1460 if (++(*brackets) > EXTRACT_BASIC_MAX) {
1461 bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
1462 code[1 + LINK_SIZE] = OP_BRANUMBER;
1463 put2ByteValue(code + 2 + LINK_SIZE, *brackets);
1464 skipbytes = 3;
1465 }
1466 else
1467 bravalue = OP_BRA + *brackets;
1468 }
1469
1470 /* Process nested bracketed re. Assertions may not be repeated, but other
1471 kinds can be. We copy code into a non-variable in order to be a ble
1472 to pass its address because some compilers complain otherwise. Pass in a
1473 new setting for the ims options if they have changed. */
1474
1475 previous = (bravalue >= OP_BRAZERO) ? code : 0;
1476 *code = bravalue;
1477 tempcode = code;
1478 tempreqvary = cd.req_varyopt; /* Save value before bracket * /
1479
1480 if (!compileBracket(
1481 options,
1482 brackets, /* Extracting b racket count */
1483 &tempcode, /* Where to put code (updated) */
1484 &ptr, /* Input pointe r (updated) */
1485 patternEnd,
1486 errorcodeptr, /* Where to put an error message */
1487 skipbytes, /* Skip over OP _BRANUMBER */
1488 &subfirstbyte, /* For possible first char */
1489 &subreqbyte, /* For possible last char */
1490 cd)) /* Tables block */
1491 goto FAILED;
1492
1493 /* At the end of compiling, code is still pointing to the start of the
1494 group, while tempcode has been updated to point past the end of the group
1495 and any option resetting that may follow it. The pattern pointe r (ptr)
1496 is on the bracket. */
1497
1498 /* Handle updating of the required and first characters. Update for normal
1499 brackets of all kinds, and conditions with two branches (see co de above).
1500 If the bracket is followed by a quantifier with zero repeat, we have to
1501 back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the
1502 main loop so that they can be accessed for the back off. */
1503
1504 zeroreqbyte = reqbyte;
1505 zerofirstbyte = firstbyte;
1506 groupsetfirstbyte = false;
1507
1508 if (bravalue >= OP_BRA) {
1509 /* If we have not yet set a firstbyte in this branch, take i t from the
1510 subpattern, remembering that it was set here so that a repe at of more
1511 than one can replicate it as reqbyte if necessary. If the s ubpattern has
1512 no firstbyte, set "none" for the whole branch. In both case s, a zero
1513 repeat forces firstbyte to "none". */
1514
1515 if (firstbyte == REQ_UNSET) {
1516 if (subfirstbyte >= 0) {
1517 firstbyte = subfirstbyte;
1518 groupsetfirstbyte = true;
1519 }
1520 else
1521 firstbyte = REQ_NONE;
1522 zerofirstbyte = REQ_NONE;
1523 }
1524
1525 /* If firstbyte was previously set, convert the subpattern's firstbyte
1526 into reqbyte if there wasn't one, using the vary flag that was in
1527 existence beforehand. */
1528
1529 else if (subfirstbyte >= 0 && subreqbyte < 0)
1530 subreqbyte = subfirstbyte | tempreqvary;
1531
1532 /* If the subpattern set a required byte (or set a first byt e that isn't
1533 really the first byte - see above), set it. */
1534
1535 if (subreqbyte >= 0)
1536 reqbyte = subreqbyte;
1537 }
1538
1539 /* For a forward assertion, we take the reqbyte, if set. This ca n be
1540 helpful if the pattern that follows the assertion doesn't set a different
1541 char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte
1542 for an assertion, however because it leads to incorrect effect for patterns
1543 such as /(?=a)a.+/ when the "real" "a" would then become a reqb yte instead
1544 of a firstbyte. This is overcome by a scan at the end if there' s no
1545 firstbyte, looking for an asserted first char. */
1546
1547 else if (bravalue == OP_ASSERT && subreqbyte >= 0)
1548 reqbyte = subreqbyte;
1549
1550 /* Now update the main code pointer to the end of the group. */
1551
1552 code = tempcode;
1553
1554 /* Error if hit end of pattern */
1555
1556 if (ptr >= patternEnd || *ptr != ')') {
1557 *errorcodeptr = ERR14;
1558 goto FAILED;
1559 }
1560 break;
1561
1562 /* Check \ for being a real metacharacter; if not, fall through and handle
1563 it as a data character at the start of a string. Escape items are c hecked
1564 for validity in the pre-compiling pass. */
1565
1566 case '\\':
1567 tempptr = ptr;
1568 c = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingB rackets, false);
1569
1570 /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1571 are arranged to be the negation of the corresponding OP_values. For the
1572 back references, the values are ESC_REF plus the reference numb er. Only
1573 back references and those types that consume a character may be repeated.
1574 We can test for values between ESC_b and ESC_w for the latter; this may
1575 have to change if any new ones are ever created. */
1576
1577 if (c < 0) {
1578 /* For metasequences that actually match a character, we dis able the
1579 setting of a first character if it hasn't already been set. */
1580
1581 if (firstbyte == REQ_UNSET && -c > ESC_b && -c <= ESC_w)
1582 firstbyte = REQ_NONE;
1583
1584 /* Set values to reset to if this is followed by a zero repe at. */
1585
1586 zerofirstbyte = firstbyte;
1587 zeroreqbyte = reqbyte;
1588
1589 /* Back references are handled specially */
1590
1591 if (-c >= ESC_REF) {
1592 int number = -c - ESC_REF;
1593 previous = code;
1594 *code++ = OP_REF;
1595 put2ByteValueAndAdvance(code, number);
1596 }
1597
1598 /* For the rest, we can obtain the OP value by negating the escape
1599 value */
1600
1601 else {
1602 previous = (-c > ESC_b && -c <= ESC_w) ? code : NULL;
1603 *code++ = -c;
1604 }
1605 continue;
1606 }
1607
1608 /* Fall through. */
1609
1610 /* Handle a literal character. It is guaranteed not to be whites pace or #
1611 when the extended flag is set. If we are in UTF-8 mode, it may be a
1612 multi-byte literal character. */
1613
1614 default:
1615 NORMAL_CHAR:
1616
1617 previous = code;
1618
1619 if (c < 128) {
1620 mclength = 1;
1621 mcbuffer[0] = c;
1622
1623 if ((options & IgnoreCaseOption) && (c | 0x20) >= 'a' && (c | 0x20) <= 'z') {
1624 *code++ = OP_ASCII_LETTER_IGNORING_CASE;
1625 *code++ = c | 0x20;
1626 } else {
1627 *code++ = OP_ASCII_CHAR;
1628 *code++ = c;
1629 }
1630 } else {
1631 mclength = encodeUTF8(c, mcbuffer);
1632
1633 *code++ = (options & IgnoreCaseOption) ? OP_CHAR_IGNORING_CA SE : OP_CHAR;
1634 for (c = 0; c < mclength; c++)
1635 *code++ = mcbuffer[c];
1636 }
1637
1638 /* Set the first and required bytes appropriately. If no previou s first
1639 byte, set it from this character, but revert to none on a zero repeat.
1640 Otherwise, leave the firstbyte value alone, and don't change it on a zero
1641 repeat. */
1642
1643 if (firstbyte == REQ_UNSET) {
1644 zerofirstbyte = REQ_NONE;
1645 zeroreqbyte = reqbyte;
1646
1647 /* If the character is more than one byte long, we can set f irstbyte
1648 only if it is not to be matched caselessly. */
1649
1650 if (mclength == 1 || req_caseopt == 0) {
1651 firstbyte = mcbuffer[0] | req_caseopt;
1652 if (mclength != 1)
1653 reqbyte = code[-1] | cd.req_varyopt;
1654 }
1655 else
1656 firstbyte = reqbyte = REQ_NONE;
1657 }
1658
1659 /* firstbyte was previously set; we can set reqbyte only the len gth is
1660 1 or the matching is caseful. */
1661
1662 else {
1663 zerofirstbyte = firstbyte;
1664 zeroreqbyte = reqbyte;
1665 if (mclength == 1 || req_caseopt == 0)
1666 reqbyte = code[-1] | req_caseopt | cd.req_varyopt;
1667 }
1668
1669 break; /* End of literal character handling */
1670 }
1671 } /* end of big loop */
1672
1673 /* Control never reaches here by falling through, only by a goto for all the
1674 error states. Pass back the position in the pattern so that it can be displ ayed
1675 to the user for diagnosing the error. */
1676
1677 FAILED:
1678 *ptrptr = ptr;
1679 return false;
1680 }
1681
1682 /*************************************************
1683 * Compile sequence of alternatives *
1684 *************************************************/
1685
1686 /* On entry, ptr is pointing past the bracket character, but on return
1687 it points to the closing bracket, or vertical bar, or end of string.
1688 The code variable is pointing at the byte into which the BRA operator has been
1689 stored. If the ims options are changed at the start (for a (?ims: group) or
1690 during any branch, we need to insert an OP_OPT item at the start of every
1691 following branch to ensure they get set correctly at run time, and also pass
1692 the new options into every subsequent branch compile.
1693
1694 Argument:
1695 options option bits, including any changes for this subpattern
1696 brackets -> int containing the number of extracting brackets used
1697 codeptr -> the address of the current code pointer
1698 ptrptr -> the address of the current pattern pointer
1699 errorcodeptr -> pointer to error code variable
1700 skipbytes skip this many bytes at start (for OP_BRANUMBER)
1701 firstbyteptr place to put the first required character, or a negative number
1702 reqbyteptr place to put the last required character, or a negative number
1703 cd points to the data block with tables pointers etc.
1704
1705 Returns: true on success
1706 */
1707
1708 static bool
1709 compileBracket(int options, int* brackets, unsigned char** codeptr,
1710 const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int skipbytes,
1711 int* firstbyteptr, int* reqbyteptr, CompileData& cd)
1712 {
1713 const UChar* ptr = *ptrptr;
1714 unsigned char* code = *codeptr;
1715 unsigned char* last_branch = code;
1716 unsigned char* start_bracket = code;
1717 int firstbyte = REQ_UNSET;
1718 int reqbyte = REQ_UNSET;
1719
1720 /* Offset is set zero to mark that this bracket is still open */
1721
1722 putLinkValueAllowZero(code + 1, 0);
1723 code += 1 + LINK_SIZE + skipbytes;
1724
1725 /* Loop for each alternative branch */
1726
1727 while (true) {
1728 /* Now compile the branch */
1729
1730 int branchfirstbyte;
1731 int branchreqbyte;
1732 if (!compileBranch(options, brackets, &code, &ptr, patternEnd, errorcode ptr,
1733 &branchfirstbyte, &branchreqbyte, cd)) {
1734 *ptrptr = ptr;
1735 return false;
1736 }
1737
1738 /* If this is the first branch, the firstbyte and reqbyte values for the
1739 branch become the values for the regex. */
1740
1741 if (*last_branch != OP_ALT) {
1742 firstbyte = branchfirstbyte;
1743 reqbyte = branchreqbyte;
1744 }
1745
1746 /* If this is not the first branch, the first char and reqbyte have to
1747 match the values from all the previous branches, except that if the pre vious
1748 value for reqbyte didn't have REQ_VARY set, it can still match, and we set
1749 REQ_VARY for the regex. */
1750
1751 else {
1752 /* If we previously had a firstbyte, but it doesn't match the new br anch,
1753 we have to abandon the firstbyte for the regex, but if there was pr eviously
1754 no reqbyte, it takes on the value of the old firstbyte. */
1755
1756 if (firstbyte >= 0 && firstbyte != branchfirstbyte) {
1757 if (reqbyte < 0)
1758 reqbyte = firstbyte;
1759 firstbyte = REQ_NONE;
1760 }
1761
1762 /* If we (now or from before) have no firstbyte, a firstbyte from th e
1763 branch becomes a reqbyte if there isn't a branch reqbyte. */
1764
1765 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
1766 branchreqbyte = branchfirstbyte;
1767
1768 /* Now ensure that the reqbytes match */
1769
1770 if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
1771 reqbyte = REQ_NONE;
1772 else
1773 reqbyte |= branchreqbyte; /* To "or" REQ_VARY */
1774 }
1775
1776 /* Reached end of expression, either ')' or end of pattern. Go back thro ugh
1777 the alternative branches and reverse the chain of offsets, with the fie ld in
1778 the BRA item now becoming an offset to the first alternative. If there are
1779 no alternatives, it points to the end of the group. The length in the
1780 terminating ket is always the length of the whole bracketed item. If an y of
1781 the ims options were changed inside the group, compile a resetting op-c ode
1782 following, except at the very end of the pattern. Return leaving the po inter
1783 at the terminating char. */
1784
1785 if (ptr >= patternEnd || *ptr != '|') {
1786 int length = code - last_branch;
1787 do {
1788 int prev_length = getLinkValueAllowZero(last_branch + 1);
1789 putLinkValue(last_branch + 1, length);
1790 length = prev_length;
1791 last_branch -= length;
1792 } while (length > 0);
1793
1794 /* Fill in the ket */
1795
1796 *code = OP_KET;
1797 putLinkValue(code + 1, code - start_bracket);
1798 code += 1 + LINK_SIZE;
1799
1800 /* Set values to pass back */
1801
1802 *codeptr = code;
1803 *ptrptr = ptr;
1804 *firstbyteptr = firstbyte;
1805 *reqbyteptr = reqbyte;
1806 return true;
1807 }
1808
1809 /* Another branch follows; insert an "or" node. Its length field points back
1810 to the previous branch while the bracket remains open. At the end the c hain
1811 is reversed. It's done like this so that the start of the bracket has a
1812 zero offset until it is closed, making it possible to detect recursion. */
1813
1814 *code = OP_ALT;
1815 putLinkValue(code + 1, code - last_branch);
1816 last_branch = code;
1817 code += 1 + LINK_SIZE;
1818 ptr++;
1819 }
1820 ASSERT_NOT_REACHED();
1821 }
1822
1823 /*************************************************
1824 * Check for anchored expression *
1825 *************************************************/
1826
1827 /* Try to find out if this is an anchored regular expression. Consider each
1828 alternative branch. If they all start OP_CIRC, or with a bracket
1829 all of whose alternatives start OP_CIRC (recurse ad lib), then
1830 it's anchored.
1831
1832 Arguments:
1833 code points to start of expression (the bracket)
1834 captureMap a bitmap of which brackets we are inside while testing; this
1835 handles up to substring 31; all brackets after that share
1836 the zero bit
1837 backrefMap the back reference bitmap
1838 */
1839
1840 static bool branchIsAnchored(const unsigned char* code)
1841 {
1842 const unsigned char* scode = firstSignificantOpcode(code);
1843 int op = *scode;
1844
1845 /* Brackets */
1846 if (op >= OP_BRA || op == OP_ASSERT)
1847 return bracketIsAnchored(scode);
1848
1849 /* Check for explicit anchoring */
1850 return op == OP_CIRC;
1851 }
1852
1853 static bool bracketIsAnchored(const unsigned char* code)
1854 {
1855 do {
1856 if (!branchIsAnchored(code + 1 + LINK_SIZE))
1857 return false;
1858 code += getLinkValue(code + 1);
1859 } while (*code == OP_ALT); /* Loop for each alternative */
1860 return true;
1861 }
1862
1863 /*************************************************
1864 * Check for starting with ^ or .* *
1865 *************************************************/
1866
1867 /* This is called to find out if every branch starts with ^ or .* so that
1868 "first char" processing can be done to speed things up in multiline
1869 matching and for non-DOTALL patterns that start with .* (which must start at
1870 the beginning or after \n)
1871
1872 Except when the .* appears inside capturing parentheses, and there is a
1873 subsequent back reference to those parentheses. By keeping a bitmap of the
1874 first 31 back references, we can catch some of the more common cases more
1875 precisely; all the greater back references share a single bit.
1876
1877 Arguments:
1878 code points to start of expression (the bracket)
1879 captureMap a bitmap of which brackets we are inside while testing; this
1880 handles up to substring 31; all brackets after that share
1881 the zero bit
1882 backrefMap the back reference bitmap
1883 */
1884
1885 static bool branchNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap)
1886 {
1887 const unsigned char* scode = firstSignificantOpcode(code);
1888 int op = *scode;
1889
1890 /* Capturing brackets */
1891 if (op > OP_BRA) {
1892 int captureNum = op - OP_BRA;
1893 if (captureNum > EXTRACT_BASIC_MAX)
1894 captureNum = get2ByteValue(scode + 2 + LINK_SIZE);
1895 int bracketMask = (captureNum < 32) ? (1 << captureNum) : 1;
1896 return bracketNeedsLineStart(scode, captureMap | bracketMask, backrefMap );
1897 }
1898
1899 /* Other brackets */
1900 if (op == OP_BRA || op == OP_ASSERT)
1901 return bracketNeedsLineStart(scode, captureMap, backrefMap);
1902
1903 /* .* means "start at start or after \n" if it isn't in brackets that
1904 may be referenced. */
1905
1906 if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1907 return scode[1] == OP_NOT_NEWLINE && !(captureMap & backrefMap);
1908
1909 /* Explicit ^ */
1910 return op == OP_CIRC || op == OP_BOL;
1911 }
1912
1913 static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap , unsigned backrefMap)
1914 {
1915 do {
1916 if (!branchNeedsLineStart(code + 1 + LINK_SIZE, captureMap, backrefMap))
1917 return false;
1918 code += getLinkValue(code + 1);
1919 } while (*code == OP_ALT); /* Loop for each alternative */
1920 return true;
1921 }
1922
1923 /*************************************************
1924 * Check for asserted fixed first char *
1925 *************************************************/
1926
1927 /* During compilation, the "first char" settings from forward assertions are
1928 discarded, because they can cause conflicts with actual literals that follow.
1929 However, if we end up without a first char setting for an unanchored pattern,
1930 it is worth scanning the regex to see if there is an initial asserted first
1931 char. If all branches start with the same asserted char, or with a bracket all
1932 of whose alternatives start with the same asserted char (recurse ad lib), then
1933 we return that char, otherwise -1.
1934
1935 Arguments:
1936 code points to start of expression (the bracket)
1937 options pointer to the options (used to check casing changes)
1938 inassert true if in an assertion
1939
1940 Returns: -1 or the fixed first char
1941 */
1942
1943 static int branchFindFirstAssertedCharacter(const unsigned char* code, bool inas sert)
1944 {
1945 const unsigned char* scode = firstSignificantOpcodeSkippingAssertions(code);
1946 int op = *scode;
1947
1948 if (op >= OP_BRA)
1949 op = OP_BRA;
1950
1951 switch (op) {
1952 default:
1953 return -1;
1954
1955 case OP_BRA:
1956 case OP_ASSERT:
1957 return bracketFindFirstAssertedCharacter(scode, op == OP_ASSERT);
1958
1959 case OP_EXACT:
1960 scode += 2;
1961 /* Fall through */
1962
1963 case OP_CHAR:
1964 case OP_CHAR_IGNORING_CASE:
1965 case OP_ASCII_CHAR:
1966 case OP_ASCII_LETTER_IGNORING_CASE:
1967 case OP_PLUS:
1968 case OP_MINPLUS:
1969 if (!inassert)
1970 return -1;
1971 return scode[1];
1972 }
1973 }
1974
1975 static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool ina ssert)
1976 {
1977 int c = -1;
1978 do {
1979 int d = branchFindFirstAssertedCharacter(code + 1 + LINK_SIZE, inassert) ;
1980 if (d < 0)
1981 return -1;
1982 if (c < 0)
1983 c = d;
1984 else if (c != d)
1985 return -1;
1986 code += getLinkValue(code + 1);
1987 } while (*code == OP_ALT);
1988 return c;
1989 }
1990
1991 static inline int multiplyWithOverflowCheck(int a, int b)
1992 {
1993 if (!a || !b)
1994 return 0;
1995 if (a > MAX_PATTERN_SIZE / b)
1996 return -1;
1997 return a * b;
1998 }
1999
2000 static int calculateCompiledPatternLength(const UChar* pattern, int patternLengt h, JSRegExpIgnoreCaseOption ignoreCase,
2001 CompileData& cd, ErrorCode& errorcode)
2002 {
2003 /* Make a pass over the pattern to compute the
2004 amount of store required to hold the compiled code. This does not have to b e
2005 perfect as long as errors are overestimates. */
2006
2007 if (patternLength > MAX_PATTERN_SIZE) {
2008 errorcode = ERR16;
2009 return -1;
2010 }
2011
2012 int length = 1 + LINK_SIZE; /* For initial BRA plus length */
2013 int branch_extra = 0;
2014 int lastitemlength = 0;
2015 unsigned brastackptr = 0;
2016 int brastack[BRASTACK_SIZE];
2017 unsigned char bralenstack[BRASTACK_SIZE];
2018 int bracount = 0;
2019
2020 const UChar* ptr = (const UChar*)(pattern - 1);
2021 const UChar* patternEnd = (const UChar*)(pattern + patternLength);
2022
2023 while (++ptr < patternEnd) {
2024 int minRepeats = 0, maxRepeats = 0;
2025 int c = *ptr;
2026
2027 switch (c) {
2028 /* A backslashed item may be an escaped data character or it may be a
2029 character type. */
2030
2031 case '\\':
2032 c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBra ckets, false);
2033 if (errorcode != 0)
2034 return -1;
2035
2036 lastitemlength = 1; /* Default length of last item for repea ts */
2037
2038 if (c >= 0) { /* Data character */
2039 length += 2; /* For a one-byte character */
2040
2041 if (c > 127) {
2042 int i;
2043 for (i = 0; i < kjs_pcre_utf8_table1_size; i++)
2044 if (c <= kjs_pcre_utf8_table1[i]) break;
2045 length += i;
2046 lastitemlength += i;
2047 }
2048
2049 continue;
2050 }
2051
2052 /* Other escapes need one byte */
2053
2054 length++;
2055
2056 /* A back reference needs an additional 2 bytes, plus either one or 5
2057 bytes for a repeat. We also need to keep the value of the highe st
2058 back reference. */
2059
2060 if (c <= -ESC_REF) {
2061 int refnum = -c - ESC_REF;
2062 cd.backrefMap |= (refnum < 32) ? (1 << refnum) : 1;
2063 if (refnum > cd.top_backref)
2064 cd.top_backref = refnum;
2065 length += 2; /* For single back reference */
2066 if (safelyCheckNextChar(ptr, patternEnd, '{') && isCountedRe peat(ptr + 2, patternEnd)) {
2067 ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats , &errorcode);
2068 if (errorcode)
2069 return -1;
2070 if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats = = -1)) ||
2071 (minRepeats == 1 && maxRepeats == -1))
2072 length++;
2073 else
2074 length += 5;
2075 if (safelyCheckNextChar(ptr, patternEnd, '?'))
2076 ptr++;
2077 }
2078 }
2079 continue;
2080
2081 case '^': /* Single-byte metacharacters */
2082 case '.':
2083 case '$':
2084 length++;
2085 lastitemlength = 1;
2086 continue;
2087
2088 case '*': /* These repeats won't be after brackets; */
2089 case '+': /* those are handled separately */
2090 case '?':
2091 length++;
2092 goto POSSESSIVE;
2093
2094 /* This covers the cases of braced repeats after a single char, meta char,
2095 class, or back reference. */
2096
2097 case '{':
2098 if (!isCountedRepeat(ptr + 1, patternEnd))
2099 goto NORMAL_CHAR;
2100 ptr = readRepeatCounts(ptr + 1, &minRepeats, &maxRepeats, &error code);
2101 if (errorcode != 0)
2102 return -1;
2103
2104 /* These special cases just insert one extra opcode */
2105
2106 if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) | |
2107 (minRepeats == 1 && maxRepeats == -1))
2108 length++;
2109
2110 /* These cases might insert additional copies of a preceding cha racter. */
2111
2112 else {
2113 if (minRepeats != 1) {
2114 length -= lastitemlength; /* Uncount the original char or metachar */
2115 if (minRepeats > 0)
2116 length += 3 + lastitemlength;
2117 }
2118 length += lastitemlength + ((maxRepeats > 0) ? 3 : 1);
2119 }
2120
2121 if (safelyCheckNextChar(ptr, patternEnd, '?'))
2122 ptr++; /* Needs no extra length */
2123
2124 POSSESSIVE: /* Test for possessive quantifier */
2125 if (safelyCheckNextChar(ptr, patternEnd, '+')) {
2126 ptr++;
2127 length += 2 + 2 * LINK_SIZE; /* Allow for atomic brackets */
2128 }
2129 continue;
2130
2131 /* An alternation contains an offset to the next branch or ket. If a ny ims
2132 options changed in the previous branch(es), and/or if we are in a
2133 lookbehind assertion, extra space will be needed at the start of th e
2134 branch. This is handled by branch_extra. */
2135
2136 case '|':
2137 if (brastackptr == 0)
2138 cd.needOuterBracket = true;
2139 length += 1 + LINK_SIZE + branch_extra;
2140 continue;
2141
2142 /* A character class uses 33 characters provided that all the charac ter
2143 values are less than 256. Otherwise, it uses a bit map for low valu ed
2144 characters, and individual items for others. Don't worry about char acter
2145 types that aren't allowed in classes - they'll get picked up during the
2146 compile. A character class that contains only one single-byte chara cter
2147 uses 2 or 3 bytes, depending on whether it is negated or not. Notic e this
2148 where we can. (In UTF-8 mode we can do this only for chars < 128.) */
2149
2150 case '[': {
2151 int class_optcount;
2152 if (*(++ptr) == '^') {
2153 class_optcount = 10; /* Greater than one */
2154 ptr++;
2155 }
2156 else
2157 class_optcount = 0;
2158
2159 bool class_utf8 = false;
2160
2161 for (; ptr < patternEnd && *ptr != ']'; ++ptr) {
2162 /* Check for escapes */
2163
2164 if (*ptr == '\\') {
2165 c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapt uringBrackets, true);
2166 if (errorcode != 0)
2167 return -1;
2168
2169 /* Handle escapes that turn into characters */
2170
2171 if (c >= 0)
2172 goto NON_SPECIAL_CHARACTER;
2173
2174 /* Escapes that are meta-things. The normal ones just af fect the
2175 bit map, but Unicode properties require an XCLASS exten ded item. */
2176
2177 else
2178 class_optcount = 10; /* \d, \s etc; make sur e > 1 */
2179 }
2180
2181 /* Anything else increments the possible optimization count. We have to
2182 detect ranges here so that we can compute the number of ext ra ranges for
2183 caseless wide characters when UCP support is available. If there are wide
2184 characters, we are going to have to use an XCLASS, even for single
2185 characters. */
2186
2187 else {
2188 c = *ptr;
2189
2190 /* Come here from handling \ above when it escapes to a char value */
2191
2192 NON_SPECIAL_CHARACTER:
2193 class_optcount++;
2194
2195 int d = -1;
2196 if (safelyCheckNextChar(ptr, patternEnd, '-')) {
2197 UChar const *hyptr = ptr++;
2198 if (safelyCheckNextChar(ptr, patternEnd, '\\')) {
2199 ptr++;
2200 d = checkEscape(&ptr, patternEnd, &errorcode, cd .numCapturingBrackets, true);
2201 if (errorcode != 0)
2202 return -1;
2203 }
2204 else if ((ptr + 1 < patternEnd) && ptr[1] != ']')
2205 d = *++ptr;
2206 if (d < 0)
2207 ptr = hyptr; /* go back to hyphen as data * /
2208 }
2209
2210 /* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or >
2211 127 for caseless matching, we will need to use an XCLAS S. */
2212
2213 if (d >= 0) {
2214 class_optcount = 10; /* Ensure > 1 */
2215 if (d < c) {
2216 errorcode = ERR8;
2217 return -1;
2218 }
2219
2220 if ((d > 255 || (ignoreCase && d > 127))) {
2221 unsigned char buffer[6];
2222 if (!class_utf8) /* Allow for XCLASS ove rhead */
2223 {
2224 class_utf8 = true;
2225 length += LINK_SIZE + 2;
2226 }
2227
2228 /* If we have UCP support, find out how many ext ra ranges are
2229 needed to map the other case of characters with in this range. We
2230 have to mimic the range optimization here, beca use extending the
2231 range upwards might push d over a boundary that makes it use
2232 another byte in the UTF-8 representation. */
2233
2234 if (ignoreCase) {
2235 int occ, ocd;
2236 int cc = c;
2237 int origd = d;
2238 while (getOthercaseRange(&cc, origd, &occ, & ocd)) {
2239 if (occ >= c && ocd <= d)
2240 continue; /* Skip embedded */
2241
2242 if (occ < c && ocd >= c - 1) /* Extend the basic range */
2243 { /* if there is overlap, */
2244 c = occ; /* noti ng that if occ < c */
2245 continue; /* we c an't have ocd > d */
2246 } /* because a subrange is */
2247 if (ocd > d && occ <= d + 1) /* always shorter than */
2248 { /* the basi c range. */
2249 d = ocd;
2250 continue;
2251 }
2252
2253 /* An extra item is needed */
2254
2255 length += 1 + encodeUTF8(occ, buffer) +
2256 ((occ == ocd) ? 0 : encodeUTF8(ocd, buff er));
2257 }
2258 }
2259
2260 /* The length of the (possibly extended) range * /
2261
2262 length += 1 + encodeUTF8(c, buffer) + encodeUTF8 (d, buffer);
2263 }
2264
2265 }
2266
2267 /* We have a single character. There is nothing to be do ne unless we
2268 are in UTF-8 mode. If the char is > 255, or 127 when ca seless, we must
2269 allow for an XCL_SINGLE item, doubled for caselessness if there is UCP
2270 support. */
2271
2272 else {
2273 if ((c > 255 || (ignoreCase && c > 127))) {
2274 unsigned char buffer[6];
2275 class_optcount = 10; /* Ensure > 1 */
2276 if (!class_utf8) /* Allow for XCLASS ove rhead */
2277 {
2278 class_utf8 = true;
2279 length += LINK_SIZE + 2;
2280 }
2281 length += (ignoreCase ? 2 : 1) * (1 + encodeUTF8 (c, buffer));
2282 }
2283 }
2284 }
2285 }
2286
2287 if (ptr >= patternEnd) { /* Missing terminating ']' */
2288 errorcode = ERR6;
2289 return -1;
2290 }
2291
2292 /* We can optimize when there was only one optimizable character .
2293 Note that this does not detect the case of a negated single cha racter.
2294 In that case we do an incorrect length computation, but it's no t a serious
2295 problem because the computed length is too large rather than to o small. */
2296
2297 if (class_optcount == 1)
2298 goto NORMAL_CHAR;
2299
2300 /* Here, we handle repeats for the class opcodes. */
2301 {
2302 length += 33;
2303
2304 /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier,
2305 we also need extra for wrapping the whole thing in a sub-pa ttern. */
2306
2307 if (safelyCheckNextChar(ptr, patternEnd, '{') && isCountedRe peat(ptr + 2, patternEnd)) {
2308 ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats , &errorcode);
2309 if (errorcode != 0)
2310 return -1;
2311 if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats = = -1)) ||
2312 (minRepeats == 1 && maxRepeats == -1))
2313 length++;
2314 else
2315 length += 5;
2316 if (safelyCheckNextChar(ptr, patternEnd, '+')) {
2317 ptr++;
2318 length += 2 + 2 * LINK_SIZE;
2319 } else if (safelyCheckNextChar(ptr, patternEnd, '?'))
2320 ptr++;
2321 }
2322 }
2323 continue;
2324 }
2325
2326 /* Brackets may be genuine groups or special things */
2327
2328 case '(': {
2329 int branch_newextra = 0;
2330 int bracket_length = 1 + LINK_SIZE;
2331 bool capturing = false;
2332
2333 /* Handle special forms of bracket, which all start (? */
2334
2335 if (safelyCheckNextChar(ptr, patternEnd, '?')) {
2336 switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0)) {
2337 /* Non-referencing groups and lookaheads just move the p ointer on, and
2338 then behave like a non-special bracket, except that the y don't increment
2339 the count of extracting brackets. Ditto for the "once o nly" bracket,
2340 which is in Perl from version 5.005. */
2341
2342 case ':':
2343 case '=':
2344 case '!':
2345 ptr += 2;
2346 break;
2347
2348 /* Else loop checking valid options until ) is met. Anyt hing else is an
2349 error. If we are without any brackets, i.e. at top leve l, the settings
2350 act as if specified in the options, so massage the opti ons immediately.
2351 This is for backward compatibility with Perl 5.004. */
2352
2353 default:
2354 errorcode = ERR12;
2355 return -1;
2356 }
2357 } else
2358 capturing = 1;
2359
2360 /* Capturing brackets must be counted so we can process escapes in a
2361 Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are goi ng to need
2362 an additional 3 bytes of memory per capturing bracket. */
2363
2364 if (capturing) {
2365 bracount++;
2366 if (bracount > EXTRACT_BASIC_MAX)
2367 bracket_length += 3;
2368 }
2369
2370 /* Save length for computing whole length at end if there's a re peat that
2371 requires duplication of the group. Also save the current value of
2372 branch_extra, and start the new group with the new value. If no n-zero, this
2373 will either be 2 for a (?imsx: group, or 3 for a lookbehind ass ertion. */
2374
2375 if (brastackptr >= sizeof(brastack)/sizeof(int)) {
2376 errorcode = ERR17;
2377 return -1;
2378 }
2379
2380 bralenstack[brastackptr] = branch_extra;
2381 branch_extra = branch_newextra;
2382
2383 brastack[brastackptr++] = length;
2384 length += bracket_length;
2385 continue;
2386 }
2387
2388 /* Handle ket. Look for subsequent maxRepeats/minRepeats; for certai n sets of values we
2389 have to replicate this bracket up to that many times. If brastackpt r is
2390 0 this is an unmatched bracket which will generate an error, but ta ke care
2391 not to try to access brastack[-1] when computing the length and res toring
2392 the branch_extra value. */
2393
2394 case ')': {
2395 int duplength;
2396 length += 1 + LINK_SIZE;
2397 if (brastackptr > 0) {
2398 duplength = length - brastack[--brastackptr];
2399 branch_extra = bralenstack[brastackptr];
2400 }
2401 else
2402 duplength = 0;
2403
2404 /* Leave ptr at the final char; for readRepeatCounts this happen s
2405 automatically; for the others we need an increment. */
2406
2407 if ((ptr + 1 < patternEnd) && (c = ptr[1]) == '{' && isCountedRe peat(ptr + 2, patternEnd)) {
2408 ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &e rrorcode);
2409 if (errorcode)
2410 return -1;
2411 } else if (c == '*') {
2412 minRepeats = 0;
2413 maxRepeats = -1;
2414 ptr++;
2415 } else if (c == '+') {
2416 minRepeats = 1;
2417 maxRepeats = -1;
2418 ptr++;
2419 } else if (c == '?') {
2420 minRepeats = 0;
2421 maxRepeats = 1;
2422 ptr++;
2423 } else {
2424 minRepeats = 1;
2425 maxRepeats = 1;
2426 }
2427
2428 /* If the minimum is zero, we have to allow for an OP_BRAZERO be fore the
2429 group, and if the maximum is greater than zero, we have to repl icate
2430 maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2431 bracket set. */
2432
2433 int repeatsLength;
2434 if (minRepeats == 0) {
2435 length++;
2436 if (maxRepeats > 0) {
2437 repeatsLength = multiplyWithOverflowCheck(maxRepeats - 1 , duplength + 3 + 2 * LINK_SIZE);
2438 if (repeatsLength < 0) {
2439 errorcode = ERR16;
2440 return -1;
2441 }
2442 length += repeatsLength;
2443 if (length > MAX_PATTERN_SIZE) {
2444 errorcode = ERR16;
2445 return -1;
2446 }
2447 }
2448 }
2449
2450 /* When the minimum is greater than zero, we have to replicate u p to
2451 minval-1 times, with no additions required in the copies. Then, if there
2452 is a limited maximum we have to replicate up to maxval-1 times allowing
2453 for a BRAZERO item before each optional copy and nesting bracke ts for all
2454 but one of the optional copies. */
2455
2456 else {
2457 repeatsLength = multiplyWithOverflowCheck(minRepeats - 1, du plength);
2458 if (repeatsLength < 0) {
2459 errorcode = ERR16;
2460 return -1;
2461 }
2462 length += repeatsLength;
2463 if (maxRepeats > minRepeats) { /* Need this test as maxRepea ts=-1 means no limit */
2464 repeatsLength = multiplyWithOverflowCheck(maxRepeats - m inRepeats, duplength + 3 + 2 * LINK_SIZE);
2465 if (repeatsLength < 0) {
2466 errorcode = ERR16;
2467 return -1;
2468 }
2469 length += repeatsLength - (2 + 2 * LINK_SIZE);
2470 }
2471 if (length > MAX_PATTERN_SIZE) {
2472 errorcode = ERR16;
2473 return -1;
2474 }
2475 }
2476
2477 /* Allow space for once brackets for "possessive quantifier" */
2478
2479 if (safelyCheckNextChar(ptr, patternEnd, '+')) {
2480 ptr++;
2481 length += 2 + 2 * LINK_SIZE;
2482 }
2483 continue;
2484 }
2485
2486 /* Non-special character. It won't be space or # in extended mode, s o it is
2487 always a genuine character. If we are in a \Q...\E sequence, check for the
2488 end; if not, we have a literal. */
2489
2490 default:
2491 NORMAL_CHAR:
2492 length += 2; /* For a one-byte character */
2493 lastitemlength = 1; /* Default length of last item for repeats */
2494
2495 if (c > 127) {
2496 int i;
2497 for (i = 0; i < kjs_pcre_utf8_table1_size; i++)
2498 if (c <= kjs_pcre_utf8_table1[i])
2499 break;
2500 length += i;
2501 lastitemlength += i;
2502 }
2503
2504 continue;
2505 }
2506 }
2507
2508 length += 2 + LINK_SIZE; /* For final KET and END */
2509
2510 cd.numCapturingBrackets = bracount;
2511 return length;
2512 }
2513
2514 /*************************************************
2515 * Compile a Regular Expression *
2516 *************************************************/
2517
2518 /* This function takes a string and returns a pointer to a block of store
2519 holding a compiled version of the expression. The original API for this
2520 function had no error code return variable; it is retained for backwards
2521 compatibility. The new function is given a new name.
2522
2523 Arguments:
2524 pattern the regular expression
2525 options various option bits
2526 errorcodeptr pointer to error code variable (pcre_compile2() only)
2527 can be NULL if you don't want a code value
2528 errorptr pointer to pointer to error text
2529 erroroffset ptr offset in pattern where error was detected
2530 tables pointer to character tables or NULL
2531
2532 Returns: pointer to compiled data block, or NULL on error,
2533 with errorptr and erroroffset set
2534 */
2535
2536 static inline JSRegExp* returnError(ErrorCode errorcode, const char** errorptr)
2537 {
2538 *errorptr = errorText(errorcode);
2539 return 0;
2540 }
2541
2542 JSRegExp* jsRegExpCompile(const UChar* pattern, int patternLength,
2543 JSRegExpIgnoreCaseOption ignoreCase, JSRegExpMultilineOption mul tiline,
2544 unsigned* numSubpatterns, const char** errorptr,
2545 malloc_t* allocate_function, free_t* free_function)
2546 {
2547 /* We can't pass back an error message if errorptr is NULL; I guess the best we
2548 can do is just return NULL, but we can set a code value if there is a code pointer. */
2549 if (!errorptr)
2550 return 0;
2551 *errorptr = NULL;
2552
2553 CompileData cd;
2554
2555 ErrorCode errorcode = ERR0;
2556 /* Call this once just to count the brackets. */
2557 calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, error code);
2558 /* Call it again to compute the length. */
2559 int length = calculateCompiledPatternLength(pattern, patternLength, ignoreCa se, cd, errorcode);
2560 if (errorcode)
2561 return returnError(errorcode, errorptr);
2562
2563 if (length > MAX_PATTERN_SIZE)
2564 return returnError(ERR16, errorptr);
2565
2566 size_t size = length + sizeof(JSRegExp);
2567 JSRegExp* re = reinterpret_cast<JSRegExp*>((*allocate_function)(size));
2568
2569 if (!re)
2570 return returnError(ERR13, errorptr);
2571
2572 re->options = (ignoreCase ? IgnoreCaseOption : 0) | (multiline ? MatchAcross MultipleLinesOption : 0);
2573
2574 /* The starting points of the name/number translation table and of the code are
2575 passed around in the compile data block. */
2576
2577 const unsigned char* codeStart = (const unsigned char*)(re + 1);
2578
2579 /* Set up a starting, non-extracting bracket, then compile the expression. O n
2580 error, errorcode will be set non-zero, so we don't need to look at the resu lt
2581 of the function here. */
2582
2583 const UChar* ptr = (const UChar*)pattern;
2584 const UChar* patternEnd = pattern + patternLength;
2585 unsigned char* code = (unsigned char*)codeStart;
2586 int firstbyte, reqbyte;
2587 int bracketCount = 0;
2588 if (!cd.needOuterBracket)
2589 compileBranch(re->options, &bracketCount, &code, &ptr, patternEnd, &erro rcode, &firstbyte, &reqbyte, cd);
2590 else {
2591 *code = OP_BRA;
2592 compileBracket(re->options, &bracketCount, &code, &ptr, patternEnd, &err orcode, 0, &firstbyte, &reqbyte, cd);
2593 }
2594 re->top_bracket = bracketCount;
2595 re->top_backref = cd.top_backref;
2596
2597 /* If not reached end of pattern on success, there's an excess bracket. */
2598
2599 if (errorcode == 0 && ptr < patternEnd)
2600 errorcode = ERR10;
2601
2602 /* Fill in the terminating state and check for disastrous overflow, but
2603 if debugging, leave the test till after things are printed out. */
2604
2605 *code++ = OP_END;
2606
2607 ASSERT(code - codeStart <= length);
2608 if (code - codeStart > length)
2609 errorcode = ERR7;
2610
2611 /* Give an error if there's back reference to a non-existent capturing
2612 subpattern. */
2613
2614 if (re->top_backref > re->top_bracket)
2615 errorcode = ERR15;
2616
2617 /* Failed to compile, or error while post-processing */
2618
2619 if (errorcode != ERR0) {
2620 (*free_function)(reinterpret_cast<void*>(re));
2621 return returnError(errorcode, errorptr);
2622 }
2623
2624 /* If the anchored option was not passed, set the flag if we can determine t hat
2625 the pattern is anchored by virtue of ^ characters or \A or anything else (s uch
2626 as starting with .* when DOTALL is set).
2627
2628 Otherwise, if we know what the first character has to be, save it, because that
2629 speeds up unanchored matches no end. If not, see if we can set the
2630 UseMultiLineFirstByteOptimizationOption flag. This is helpful for multiline matches when all branches
2631 start with ^. and also when all branches start with .* for non-DOTALL match es.
2632 */
2633
2634 if (cd.needOuterBracket ? bracketIsAnchored(codeStart) : branchIsAnchored(co deStart))
2635 re->options |= IsAnchoredOption;
2636 else {
2637 if (firstbyte < 0) {
2638 firstbyte = (cd.needOuterBracket
2639 ? bracketFindFirstAssertedCharacter(codeStart, false)
2640 : branchFindFirstAssertedCharacter(codeStart, false))
2641 | ((re->options & IgnoreCaseOption) ? REQ_IGNORE_CASE : 0);
2642 }
2643 if (firstbyte >= 0) {
2644 int ch = firstbyte & 255;
2645 if (ch < 127) {
2646 re->first_byte = ((firstbyte & REQ_IGNORE_CASE) && flipCase(ch) == ch) ? ch : firstbyte;
2647 re->options |= UseFirstByteOptimizationOption;
2648 }
2649 } else {
2650 if (cd.needOuterBracket ? bracketNeedsLineStart(codeStart, 0, cd.bac krefMap) : branchNeedsLineStart(codeStart, 0, cd.backrefMap))
2651 re->options |= UseMultiLineFirstByteOptimizationOption;
2652 }
2653 }
2654
2655 /* For an anchored pattern, we use the "required byte" only if it follows a
2656 variable length item in the regex. Remove the caseless flag for non-caseabl e
2657 bytes. */
2658
2659 if (reqbyte >= 0 && (!(re->options & IsAnchoredOption) || (reqbyte & REQ_VAR Y))) {
2660 int ch = reqbyte & 255;
2661 if (ch < 127) {
2662 re->req_byte = ((reqbyte & REQ_IGNORE_CASE) && flipCase(ch) == ch) ? (reqbyte & ~REQ_IGNORE_CASE) : reqbyte;
2663 re->options |= UseRequiredByteOptimizationOption;
2664 }
2665 }
2666
2667 if (numSubpatterns)
2668 *numSubpatterns = re->top_bracket;
2669 return re;
2670 }
2671
2672 void jsRegExpFree(JSRegExp* re, free_t* free_function)
2673 {
2674 (*free_function)(reinterpret_cast<void*>(re));
2675 }
2676
2677 } } // namespace v8::jscre
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698