Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(474)

Side by Side Diff: patches/regex2.patch

Issue 822213003: ICU upgrade to 54.1 step 2 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: readme: better wrapping, declspec patch dropped Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « patches/regex.patch ('k') | patches/vscomp.patch » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 Index: source/common/unicode/utypes.h
2 ===================================================================
3 --- source/common/unicode/utypes.h (revision 292709)
4 +++ source/common/unicode/utypes.h (working copy)
5 @@ -647,6 +647,7 @@
6 U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack sta ck overflow. */
7 U_REGEX_TIME_OUT, /**< Maximum allowed match time excee ded */
8 U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by us er callback fn. */
9 + U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size o r complexity. */
10 U_REGEX_ERROR_LIMIT, /**< This must always be the last val ue to indicate the limit for regexp errors */
11
12 /*
13 Index: source/common/utypes.c
14 ===================================================================
15 --- source/common/utypes.c (revision 292709)
16 +++ source/common/utypes.c (working copy)
17 @@ -165,7 +165,8 @@
18 "U_REGEX_INVALID_RANGE",
19 "U_REGEX_STACK_OVERFLOW",
20 "U_REGEX_TIME_OUT",
21 - "U_REGEX_STOPPED_BY_CALLER"
22 + "U_REGEX_STOPPED_BY_CALLER",
23 + "U_REGEX_PATTERN_TOO_BIG"
24 };
25
26 static const char * const
27 Index: source/i18n/regexcmp.cpp
28 ===================================================================
29 --- source/i18n/regexcmp.cpp (revision 292943)
30 +++ source/i18n/regexcmp.cpp (working copy)
31 @@ -302,7 +302,7 @@
32 // present in the saved state: the input string position (int64_t) and
33 // the position in the compiled pattern.
34 //
35 - fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT;
36 + allocateStackData(RESTACKFRAME_HDRCOUNT);
37
38 //
39 // Optimization pass 1: NOPs, back-references, and case-folding
40 @@ -368,9 +368,9 @@
41 // the start of an ( grouping.
42 //4 NOP Resreved, will be replaced by a save if there are
43 // OR | operators at the top level
44 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus );
45 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP, 3), *fStatus);
46 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus);
47 + appendOp(URX_BUILD(URX_STATE_SAVE, 2));
48 + appendOp(URX_BUILD(URX_JMP, 3));
49 + appendOp(URX_BUILD(URX_FAIL, 0));
50
51 // Standard open nonCapture paren action emits the two NOPs and
52 // sets up the paren stack frame.
53 @@ -393,7 +393,7 @@
54 }
55
56 // add the END operation to the compiled pattern.
57 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus);
58 + appendOp(URX_BUILD(URX_END, 0));
59
60 // Terminate the pattern compilation state machine.
61 returnVal = FALSE;
62 @@ -422,7 +422,7 @@
63 // the JMP will eventually be the location following the ')' for t he
64 // group. This will be patched in later, when the ')' is encounte red.
65 op = URX_BUILD(URX_JMP, 0);
66 - fRXPat->fCompiledPat->addElement(op, *fStatus);
67 + appendOp(op);
68
69 // Push the position of the newly added JMP op onto the parentheses stack.
70 // This registers if for fixup when this block's close paren is enc ountered.
71 @@ -431,7 +431,7 @@
72 // Append a NOP to the compiled pattern. This is the slot reserved
73 // for a SAVE in the event that there is yet another '|' followin g
74 // this one.
75 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
76 + appendOp(URX_BUILD(URX_NOP, 0));
77 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
78 }
79 break;
80 @@ -457,12 +457,11 @@
81 // END_CAPTURE is encountered.
82 {
83 fixLiterals();
84 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
85 - int32_t varsLoc = fRXPat->fFrameSize; // Reserve three slots in match stack frame.
86 - fRXPat->fFrameSize += 3;
87 - int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc);
88 - fRXPat->fCompiledPat->addElement(cop, *fStatus);
89 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
90 + appendOp(URX_BUILD(URX_NOP, 0));
91 + int32_t varsLoc = allocateStackData(3); // Reserve three slots in match stack frame.
92 + int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc);
93 + appendOp(cop);
94 + appendOp(URX_BUILD(URX_NOP, 0));
95
96 // On the Parentheses stack, start a new frame and add the postions
97 // of the two NOPs. Depending on what follows in the pattern, th e
98 @@ -487,8 +486,8 @@
99 // is an '|' alternation within the parens.
100 {
101 fixLiterals();
102 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
103 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
104 + appendOp(URX_BUILD(URX_NOP, 0));
105 + appendOp(URX_BUILD(URX_NOP, 0));
106
107 // On the Parentheses stack, start a new frame and add the postions
108 // of the two NOPs.
109 @@ -510,12 +509,11 @@
110 // is an '|' alternation within the parens.
111 {
112 fixLiterals();
113 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
114 - int32_t varLoc = fRXPat->fDataSize; // Reserve a data locati on for saving the
115 - fRXPat->fDataSize += 1; // state stack ptr.
116 - int32_t stoOp = URX_BUILD(URX_STO_SP, varLoc);
117 - fRXPat->fCompiledPat->addElement(stoOp, *fStatus);
118 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
119 + appendOp(URX_BUILD(URX_NOP, 0));
120 + int32_t varLoc = allocateData(1); // Reserve a data location fo r saving the state stack ptr.
121 + int32_t stoOp = URX_BUILD(URX_STO_SP, varLoc);
122 + appendOp(stoOp);
123 + appendOp(URX_BUILD(URX_NOP, 0));
124
125 // On the Parentheses stack, start a new frame and add the postions
126 // of the two NOPs. Depending on what follows in the pattern, th e
127 @@ -558,26 +556,25 @@
128 // Two data slots are reserved, for saving the stack ptr and the input position.
129 {
130 fixLiterals();
131 - int32_t dataLoc = fRXPat->fDataSize;
132 - fRXPat->fDataSize += 2;
133 + int32_t dataLoc = allocateData(2);
134 int32_t op = URX_BUILD(URX_LA_START, dataLoc);
135 - fRXPat->fCompiledPat->addElement(op, *fStatus);
136 + appendOp(op);
137
138 op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
139 - fRXPat->fCompiledPat->addElement(op, *fStatus);
140 + appendOp(op);
141
142 op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
143 - fRXPat->fCompiledPat->addElement(op, *fStatus);
144 -
145 + appendOp(op);
146 +
147 op = URX_BUILD(URX_LA_END, dataLoc);
148 - fRXPat->fCompiledPat->addElement(op, *fStatus);
149 + appendOp(op);
150
151 op = URX_BUILD(URX_BACKTRACK, 0);
152 - fRXPat->fCompiledPat->addElement(op, *fStatus);
153 -
154 + appendOp(op);
155 +
156 op = URX_BUILD(URX_NOP, 0);
157 - fRXPat->fCompiledPat->addElement(op, *fStatus);
158 - fRXPat->fCompiledPat->addElement(op, *fStatus);
159 + appendOp(op);
160 + appendOp(op);
161
162 // On the Parentheses stack, start a new frame and add the postions
163 // of the NOPs.
164 @@ -602,16 +599,15 @@
165 // an alternate (transparent) re gion.
166 {
167 fixLiterals();
168 - int32_t dataLoc = fRXPat->fDataSize;
169 - fRXPat->fDataSize += 2;
170 + int32_t dataLoc = allocateData(2);
171 int32_t op = URX_BUILD(URX_LA_START, dataLoc);
172 - fRXPat->fCompiledPat->addElement(op, *fStatus);
173 + appendOp(op);
174
175 op = URX_BUILD(URX_STATE_SAVE, 0); // dest address will be patch ed later.
176 - fRXPat->fCompiledPat->addElement(op, *fStatus);
177 + appendOp(op);
178
179 op = URX_BUILD(URX_NOP, 0);
180 - fRXPat->fCompiledPat->addElement(op, *fStatus);
181 + appendOp(op);
182
183 // On the Parentheses stack, start a new frame and add the postions
184 // of the StateSave and NOP.
185 @@ -649,23 +645,22 @@
186 fixLiterals();
187
188 // Allocate data space
189 - int32_t dataLoc = fRXPat->fDataSize;
190 - fRXPat->fDataSize += 4;
191 + int32_t dataLoc = allocateData(4);
192
193 // Emit URX_LB_START
194 int32_t op = URX_BUILD(URX_LB_START, dataLoc);
195 - fRXPat->fCompiledPat->addElement(op, *fStatus);
196 + appendOp(op);
197
198 // Emit URX_LB_CONT
199 op = URX_BUILD(URX_LB_CONT, dataLoc);
200 - fRXPat->fCompiledPat->addElement(op, *fStatus);
201 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLengt h. To be filled later.
202 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLengt h. To be filled later.
203 + appendOp(op);
204 + appendOp(0); // MinMatchLength. To be filled later.
205 + appendOp(0); // MaxMatchLength. To be filled later.
206
207 // Emit the NOP
208 op = URX_BUILD(URX_NOP, 0);
209 - fRXPat->fCompiledPat->addElement(op, *fStatus);
210 - fRXPat->fCompiledPat->addElement(op, *fStatus);
211 + appendOp(op);
212 + appendOp(op);
213
214 // On the Parentheses stack, start a new frame and add the postions
215 // of the URX_LB_CONT and the NOP.
216 @@ -705,24 +700,23 @@
217 fixLiterals();
218
219 // Allocate data space
220 - int32_t dataLoc = fRXPat->fDataSize;
221 - fRXPat->fDataSize += 4;
222 + int32_t dataLoc = allocateData(4);
223
224 // Emit URX_LB_START
225 int32_t op = URX_BUILD(URX_LB_START, dataLoc);
226 - fRXPat->fCompiledPat->addElement(op, *fStatus);
227 + appendOp(op);
228
229 // Emit URX_LBN_CONT
230 op = URX_BUILD(URX_LBN_CONT, dataLoc);
231 - fRXPat->fCompiledPat->addElement(op, *fStatus);
232 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLengt h. To be filled later.
233 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLengt h. To be filled later.
234 - fRXPat->fCompiledPat->addElement(0, *fStatus); // Continue Loc. To be filled later.
235 + appendOp(op);
236 + appendOp(0); // MinMatchLength. To be filled later.
237 + appendOp(0); // MaxMatchLength. To be filled later.
238 + appendOp(0); // Continue Loc. To be filled later.
239
240 // Emit the NOP
241 op = URX_BUILD(URX_NOP, 0);
242 - fRXPat->fCompiledPat->addElement(op, *fStatus);
243 - fRXPat->fCompiledPat->addElement(op, *fStatus);
244 + appendOp(op);
245 + appendOp(op);
246
247 // On the Parentheses stack, start a new frame and add the postions
248 // of the URX_LB_CONT and the NOP.
249 @@ -793,11 +787,10 @@
250 if (URX_TYPE(repeatedOp) == URX_SETREF) {
251 // Emit optimized code for [char set]+
252 int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeated Op));
253 - fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
254 - frameLoc = fRXPat->fFrameSize;
255 - fRXPat->fFrameSize++;
256 + appendOp(loopOpI);
257 + frameLoc = allocateStackData(1);
258 int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
259 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
260 + appendOp(loopOpC);
261 break;
262 }
263
264 @@ -813,11 +806,10 @@
265 if (fModeFlags & UREGEX_UNIX_LINES) {
266 loopOpI |= 2;
267 }
268 - fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
269 - frameLoc = fRXPat->fFrameSize;
270 - fRXPat->fFrameSize++;
271 + appendOp(loopOpI);
272 + frameLoc = allocateStackData(1);
273 int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
274 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
275 + appendOp(loopOpC);
276 break;
277 }
278
279 @@ -831,18 +823,17 @@
280 // Zero length match is possible.
281 // Emit the code sequence that can handle it.
282 insertOp(topLoc);
283 - frameLoc = fRXPat->fFrameSize;
284 - fRXPat->fFrameSize++;
285 + frameLoc = allocateStackData(1);
286
287 int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc);
288 fRXPat->fCompiledPat->setElementAt(op, topLoc);
289
290 op = URX_BUILD(URX_JMP_SAV_X, topLoc+1);
291 - fRXPat->fCompiledPat->addElement(op, *fStatus);
292 + appendOp(op);
293 } else {
294 // Simpler code when the repeated body must match something non -empty
295 int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
296 - fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
297 + appendOp(jmpOp);
298 }
299 }
300 break;
301 @@ -855,7 +846,7 @@
302 {
303 int32_t topLoc = blockTopLoc(FALSE);
304 int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, topLoc);
305 - fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
306 + appendOp(saveStateOp);
307 }
308 break;
309
310 @@ -892,10 +883,10 @@
311 fRXPat->fCompiledPat->setElementAt(jmp1_op, jmp1_loc);
312
313 int32_t jmp2_op = URX_BUILD(URX_JMP, jmp2_loc+2);
314 - fRXPat->fCompiledPat->addElement(jmp2_op, *fStatus);
315 + appendOp(jmp2_op);
316
317 int32_t save_op = URX_BUILD(URX_STATE_SAVE, jmp1_loc+1);
318 - fRXPat->fCompiledPat->addElement(save_op, *fStatus);
319 + appendOp(save_op);
320 }
321 break;
322
323 @@ -937,10 +928,9 @@
324 // Emit optimized code for a [char set]*
325 int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeated Op));
326 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
327 - dataLoc = fRXPat->fFrameSize;
328 - fRXPat->fFrameSize++;
329 + dataLoc = allocateStackData(1);
330 int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
331 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
332 + appendOp(loopOpC);
333 break;
334 }
335
336 @@ -957,10 +947,9 @@
337 loopOpI |= 2;
338 }
339 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
340 - dataLoc = fRXPat->fFrameSize;
341 - fRXPat->fFrameSize++;
342 + dataLoc = allocateStackData(1);
343 int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
344 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
345 + appendOp(loopOpC);
346 break;
347 }
348 }
349 @@ -975,8 +964,7 @@
350 // extra loop-breaking code.
351 if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) {
352 insertOp(saveStateLoc);
353 - dataLoc = fRXPat->fFrameSize;
354 - fRXPat->fFrameSize++;
355 + dataLoc = allocateStackData(1);
356
357 int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc);
358 fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1);
359 @@ -992,7 +980,7 @@
360 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
361
362 // Append the URX_JMP_SAV or URX_JMPX operation to the compiled pat tern.
363 - fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
364 + appendOp(jmpOp);
365 }
366 break;
367
368 @@ -1009,7 +997,7 @@
369 int32_t jmpOp = URX_BUILD(URX_JMP, saveLoc);
370 int32_t stateSaveOp = URX_BUILD(URX_STATE_SAVE, jmpLoc+1);
371 fRXPat->fCompiledPat->setElementAt(jmpOp, jmpLoc);
372 - fRXPat->fCompiledPat->addElement(stateSaveOp, *fStatus);
373 + appendOp(stateSaveOp);
374 }
375 break;
376
377 @@ -1078,9 +1066,9 @@
378
379 // First the STO_SP before the start of the loop
380 insertOp(topLoc);
381 - int32_t varLoc = fRXPat->fDataSize; // Reserve a data locati on for saving the
382 - fRXPat->fDataSize += 1; // state stack ptr.
383 - int32_t op = URX_BUILD(URX_STO_SP, varLoc);
384 +
385 + int32_t varLoc = allocateData(1); // Reserve a data location for saving the
386 + int32_t op = URX_BUILD(URX_STO_SP, varLoc);
387 fRXPat->fCompiledPat->setElementAt(op, topLoc);
388
389 int32_t loopOp = (int32_t)fRXPat->fCompiledPat->popi();
390 @@ -1090,7 +1078,7 @@
391
392 // Then the LD_SP after the end of the loop
393 op = URX_BUILD(URX_LD_SP, varLoc);
394 - fRXPat->fCompiledPat->addElement(op, *fStatus);
395 + appendOp(op);
396 }
397
398 break;
399 @@ -1134,7 +1122,7 @@
400 } else {
401 op = URX_BUILD(URX_DOTANY, 0);
402 }
403 - fRXPat->fCompiledPat->addElement(op, *fStatus);
404 + appendOp(op);
405 }
406 break;
407
408 @@ -1151,7 +1139,7 @@
409 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UR EGEX_UNIX_LINES) != 0) {
410 op = URX_CARET_M_UNIX;
411 }
412 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
413 + appendOp(URX_BUILD(op, 0));
414 }
415 break;
416
417 @@ -1168,13 +1156,13 @@
418 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UR EGEX_UNIX_LINES) != 0) {
419 op = URX_DOLLAR_MD;
420 }
421 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
422 + appendOp(URX_BUILD(op, 0));
423 }
424 break;
425
426 case doBackslashA:
427 fixLiterals(FALSE);
428 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_CARET, 0), *fStatus);
429 + appendOp(URX_BUILD(URX_CARET, 0));
430 break;
431
432 case doBackslashB:
433 @@ -1186,7 +1174,7 @@
434 #endif
435 fixLiterals(FALSE);
436 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BA CKSLASH_B;
437 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 1), *fStatus);
438 + appendOp(URX_BUILD(op, 1));
439 }
440 break;
441
442 @@ -1199,63 +1187,59 @@
443 #endif
444 fixLiterals(FALSE);
445 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BA CKSLASH_B;
446 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
447 + appendOp(URX_BUILD(op, 0));
448 }
449 break;
450
451 case doBackslashD:
452 fixLiterals(FALSE);
453 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 1), *fStatu s);
454 + appendOp(URX_BUILD(URX_BACKSLASH_D, 1));
455 break;
456
457 case doBackslashd:
458 fixLiterals(FALSE);
459 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 0), *fStatu s);
460 + appendOp(URX_BUILD(URX_BACKSLASH_D, 0));
461 break;
462
463 case doBackslashG:
464 fixLiterals(FALSE);
465 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_G, 0), *fStatu s);
466 + appendOp(URX_BUILD(URX_BACKSLASH_G, 0));
467 break;
468
469 case doBackslashS:
470 fixLiterals(FALSE);
471 - fRXPat->fCompiledPat->addElement(
472 - URX_BUILD(URX_STAT_SETREF_N, URX_ISSPACE_SET), *fStatus);
473 + appendOp(URX_BUILD(URX_STAT_SETREF_N, URX_ISSPACE_SET));
474 break;
475
476 case doBackslashs:
477 fixLiterals(FALSE);
478 - fRXPat->fCompiledPat->addElement(
479 - URX_BUILD(URX_STATIC_SETREF, URX_ISSPACE_SET), *fStatus);
480 + appendOp(URX_BUILD(URX_STATIC_SETREF, URX_ISSPACE_SET));
481 break;
482
483 case doBackslashW:
484 fixLiterals(FALSE);
485 - fRXPat->fCompiledPat->addElement(
486 - URX_BUILD(URX_STAT_SETREF_N, URX_ISWORD_SET), *fStatus);
487 + appendOp(URX_BUILD(URX_STAT_SETREF_N, URX_ISWORD_SET));
488 break;
489
490 case doBackslashw:
491 fixLiterals(FALSE);
492 - fRXPat->fCompiledPat->addElement(
493 - URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET), *fStatus);
494 + appendOp(URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET));
495 break;
496
497 case doBackslashX:
498 fixLiterals(FALSE);
499 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatu s);
500 + appendOp(URX_BUILD(URX_BACKSLASH_X, 0));
501 break;
502
503
504 case doBackslashZ:
505 fixLiterals(FALSE);
506 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus);
507 + appendOp(URX_BUILD(URX_DOLLAR, 0));
508 break;
509
510 case doBackslashz:
511 fixLiterals(FALSE);
512 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_Z, 0), *fStatu s);
513 + appendOp(URX_BUILD(URX_BACKSLASH_Z, 0));
514 break;
515
516 case doEscapeError:
517 @@ -1321,7 +1305,7 @@
518 } else {
519 op = URX_BUILD(URX_BACKREF, groupNum);
520 }
521 - fRXPat->fCompiledPat->addElement(op, *fStatus);
522 + appendOp(op);
523 }
524 break;
525
526 @@ -1342,22 +1326,21 @@
527 {
528 // Emit the STO_SP
529 int32_t topLoc = blockTopLoc(TRUE);
530 - int32_t stoLoc = fRXPat->fDataSize;
531 - fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr.
532 + int32_t stoLoc = allocateData(1); // Reserve the data location f or storing save stack ptr.
533 int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
534 fRXPat->fCompiledPat->setElementAt(op, topLoc);
535
536 // Emit the STATE_SAVE
537 op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2);
538 - fRXPat->fCompiledPat->addElement(op, *fStatus);
539 + appendOp(op);
540
541 // Emit the JMP
542 op = URX_BUILD(URX_JMP, topLoc+1);
543 - fRXPat->fCompiledPat->addElement(op, *fStatus);
544 + appendOp(op);
545
546 // Emit the LD_SP
547 op = URX_BUILD(URX_LD_SP, stoLoc);
548 - fRXPat->fCompiledPat->addElement(op, *fStatus);
549 + appendOp(op);
550 }
551 break;
552
553 @@ -1377,8 +1360,7 @@
554 insertOp(topLoc);
555
556 // emit STO_SP loc
557 - int32_t stoLoc = fRXPat->fDataSize;
558 - fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr.
559 + int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr.
560 int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
561 fRXPat->fCompiledPat->setElementAt(op, topLoc);
562
563 @@ -1389,11 +1371,11 @@
564
565 // Append the JMP operation.
566 op = URX_BUILD(URX_JMP, topLoc+1);
567 - fRXPat->fCompiledPat->addElement(op, *fStatus);
568 + appendOp(op);
569
570 // Emit the LD_SP loc
571 op = URX_BUILD(URX_LD_SP, stoLoc);
572 - fRXPat->fCompiledPat->addElement(op, *fStatus);
573 + appendOp(op);
574 }
575 break;
576
577 @@ -1412,8 +1394,7 @@
578 insertOp(topLoc);
579
580 // Emit the STO_SP
581 - int32_t stoLoc = fRXPat->fDataSize;
582 - fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr.
583 + int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr.
584 int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
585 fRXPat->fCompiledPat->setElementAt(op, topLoc);
586
587 @@ -1424,7 +1405,7 @@
588
589 // Emit the LD_SP
590 op = URX_BUILD(URX_LD_SP, stoLoc);
591 - fRXPat->fCompiledPat->addElement(op, *fStatus);
592 + appendOp(op);
593 }
594 break;
595
596 @@ -1481,8 +1462,8 @@
597 // is an '|' alternation within the parens.
598 {
599 fixLiterals(FALSE);
600 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
601 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
602 + appendOp(URX_BUILD(URX_NOP, 0));
603 + appendOp(URX_BUILD(URX_NOP, 0));
604
605 // On the Parentheses stack, start a new frame and add the postions
606 // of the two NOPs (a normal non-capturing () frame, except for t he
607 @@ -1862,7 +1843,7 @@
608 } else {
609 op = URX_BUILD(URX_ONECHAR, lastCodePoint);
610 }
611 - fRXPat->fCompiledPat->addElement(op, *fStatus);
612 + appendOp(op);
613 } else {
614 // Two or more chars, emit a URX_STRING to match them.
615 if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
616 @@ -1872,12 +1853,19 @@
617 // into two single char ops, for efficiency.
618 op = URX_BUILD(URX_STRING, fRXPat->fLiteralText.length());
619 }
620 - fRXPat->fCompiledPat->addElement(op, *fStatus);
621 + appendOp(op);
622 op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
623 - fRXPat->fCompiledPat->addElement(op, *fStatus);
624 -
625 + appendOp(op);
626 +
627 // Add this string into the accumulated strings of the compiled pattern .
628 + // The total size of the accumulated strings must be restricted to 24 b its because
629 + // string indexes appear as compiled pattern operand values.
630 + // This is the only place that the pattern.fLiteralText string is modif ied.
631 +
632 fRXPat->fLiteralText.append(fLiteralChars);
633 + if (U_SUCCESS(*fStatus) && fRXPat->fLiteralText.length() > 0x00ffffff) {
634 + *fStatus = U_REGEX_PATTERN_TOO_BIG;
635 + }
636 }
637
638 fLiteralChars.remove();
639 @@ -1884,10 +1872,22 @@
640 }
641
642
643 +//----------------------------------------------------------------------------- -
644 +//
645 +// appendOp() Append a new instruction onto the compiled pattern
646 +// Includes error checking, limiting the size of the
647 +// pattern to lengths that can be represented in the
648 +// 24 bit operand field of an instruction.
649 +//
650 +//----------------------------------------------------------------------------- -
651 +void RegexCompile::appendOp(int32_t op) {
652 + fRXPat->fCompiledPat->addElement(op, *fStatus);
653 + if ((fRXPat->fCompiledPat->size() > 0x00fffff0) && U_SUCCESS(*fStatus)) {
654 + *fStatus = U_REGEX_PATTERN_TOO_BIG;
655 + }
656 +}
657
658
659 -
660 -
661 //----------------------------------------------------------------------------- -
662 //
663 // insertOp() Insert a slot for a new opcode into the already
664 @@ -1947,9 +1947,61 @@
665 }
666
667
668 +//----------------------------------------------------------------------------- -
669 +//
670 +// allocateData() Allocate storage in the matcher's static data area.
671 +// Return the index for the newly allocated data.
672 +// The storage won't actually exist until we are runnin g a match
673 +// operation, but the storage indexes are inserted into various
674 +// opcodes while compiling the pattern.
675 +//
676 +//----------------------------------------------------------------------------- -
677 +int32_t RegexCompile::allocateData(int32_t size) {
678 + if (U_FAILURE(*fStatus)) {
679 + return 0;
680 + }
681 + if (size <= 0 || size > 0x100 || fRXPat->fDataSize < 0) {
682 + *fStatus = U_REGEX_INTERNAL_ERROR;
683 + return 0;
684 + }
685 + int32_t dataIndex = fRXPat->fDataSize;
686 + fRXPat->fDataSize += size;
687 + if (fRXPat->fDataSize >= 0x00fffff0) {
688 + *fStatus = U_REGEX_PATTERN_TOO_BIG;
689 + }
690 + return dataIndex;
691 +}
692
693 +
694 //----------------------------------------------------------------------------- -
695 //
696 +// allocateStackData() Allocate space in the back-tracking stack frame.
697 +// Return the index for the newly allocated data.
698 +// The frame indexes are inserted into various
699 +// opcodes while compiling the pattern, meaning that fr ame
700 +// size must be restricted to the size that will fit
701 +// as an operand (24 bits).
702 +//
703 +//----------------------------------------------------------------------------- -
704 +int32_t RegexCompile::allocateStackData(int32_t size) {
705 + if (U_FAILURE(*fStatus)) {
706 + return 0;
707 + }
708 + if (size <= 0 || size > 0x100 || fRXPat->fFrameSize < 0) {
709 + *fStatus = U_REGEX_INTERNAL_ERROR;
710 + return 0;
711 + }
712 + int32_t dataIndex = fRXPat->fFrameSize;
713 + fRXPat->fFrameSize += size;
714 + if (fRXPat->fFrameSize >= 0x00fffff0) {
715 + *fStatus = U_REGEX_PATTERN_TOO_BIG;
716 + }
717 + return dataIndex;
718 +}
719 +
720 +
721 +//----------------------------------------------------------------------------- -
722 +//
723 // blockTopLoc() Find or create a location in the compiled pattern
724 // at the start of the operation or block that has
725 // just been compiled. Needed when a quantifier (* or
726 @@ -2065,7 +2117,7 @@
727
728 int32_t frameVarLocation = URX_VAL(captureOp);
729 int32_t endCaptureOp = URX_BUILD(URX_END_CAPTURE, frameVarLocatio n);
730 - fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
731 + appendOp(endCaptureOp);
732 }
733 break;
734 case atomic:
735 @@ -2077,7 +2129,7 @@
736 U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP);
737 int32_t stoLoc = URX_VAL(stoOp);
738 int32_t ldOp = URX_BUILD(URX_LD_SP, stoLoc);
739 - fRXPat->fCompiledPat->addElement(ldOp, *fStatus);
740 + appendOp(ldOp);
741 }
742 break;
743
744 @@ -2087,7 +2139,7 @@
745 U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
746 int32_t dataLoc = URX_VAL(startOp);
747 int32_t op = URX_BUILD(URX_LA_END, dataLoc);
748 - fRXPat->fCompiledPat->addElement(op, *fStatus);
749 + appendOp(op);
750 }
751 break;
752
753 @@ -2098,11 +2150,11 @@
754 U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
755 int32_t dataLoc = URX_VAL(startOp);
756 int32_t op = URX_BUILD(URX_LA_END, dataLoc);
757 - fRXPat->fCompiledPat->addElement(op, *fStatus);
758 + appendOp(op);
759 op = URX_BUILD(URX_BACKTRACK, 0);
760 - fRXPat->fCompiledPat->addElement(op, *fStatus);
761 + appendOp(op);
762 op = URX_BUILD(URX_LA_END, dataLoc);
763 - fRXPat->fCompiledPat->addElement(op, *fStatus);
764 + appendOp(op);
765
766 // Patch the URX_SAVE near the top of the block.
767 // The destination of the SAVE is the final LA_END that was just ad ded.
768 @@ -2123,9 +2175,9 @@
769 U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
770 int32_t dataLoc = URX_VAL(startOp);
771 int32_t op = URX_BUILD(URX_LB_END, dataLoc);
772 - fRXPat->fCompiledPat->addElement(op, *fStatus);
773 - op = URX_BUILD(URX_LA_END, dataLoc);
774 - fRXPat->fCompiledPat->addElement(op, *fStatus);
775 + appendOp(op);
776 + op = URX_BUILD(URX_LA_END, dataLoc);
777 + appendOp(op);
778
779 // Determine the min and max bounds for the length of the
780 // string that the pattern can match.
781 @@ -2162,7 +2214,7 @@
782 U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
783 int32_t dataLoc = URX_VAL(startOp);
784 int32_t op = URX_BUILD(URX_LBN_END, dataLoc);
785 - fRXPat->fCompiledPat->addElement(op, *fStatus);
786 + appendOp(op);
787
788 // Determine the min and max bounds for the length of the
789 // string that the pattern can match.
790 @@ -2228,7 +2280,7 @@
791 case 0:
792 {
793 // Set of no elements. Always fails to match.
794 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKTRACK, 0), *fSta tus);
795 + appendOp(URX_BUILD(URX_BACKTRACK, 0));
796 delete theSet;
797 }
798 break;
799 @@ -2250,7 +2302,7 @@
800 int32_t setNumber = fRXPat->fSets->size();
801 fRXPat->fSets->addElement(theSet, *fStatus);
802 int32_t setOp = URX_BUILD(URX_SETREF, setNumber);
803 - fRXPat->fCompiledPat->addElement(setOp, *fStatus);
804 + appendOp(setOp);
805 }
806 }
807 }
808 @@ -2289,11 +2341,8 @@
809 // counterLoc --> Loop counter
810 // +1 --> Input index (for breaking non-progressing loops )
811 // (Only present if unbounded upper limit on loop)
812 - int32_t counterLoc = fRXPat->fFrameSize;
813 - fRXPat->fFrameSize++;
814 - if (fIntervalUpper < 0) {
815 - fRXPat->fFrameSize++;
816 - }
817 + int32_t dataSize = fIntervalUpper < 0 ? 2 : 1;
818 + int32_t counterLoc = allocateStackData(dataSize);
819
820 int32_t op = URX_BUILD(InitOp, counterLoc);
821 fRXPat->fCompiledPat->setElementAt(op, topOfBlock);
822 @@ -2313,7 +2362,7 @@
823 // Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op.
824 // Goes at end of the block being looped over, so just append to the code so far.
825 op = URX_BUILD(LoopOp, topOfBlock);
826 - fRXPat->fCompiledPat->addElement(op, *fStatus);
827 + appendOp(op);
828
829 if ((fIntervalLow & 0xff000000) != 0 ||
830 (fIntervalUpper > 0 && (fIntervalUpper & 0xff000000) != 0)) {
831 @@ -2380,12 +2429,12 @@
832 int32_t i;
833 for (i=1; i<fIntervalUpper; i++ ) {
834 if (i == fIntervalLow) {
835 - fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
836 + appendOp(saveOp);
837 }
838 if (i > fIntervalLow) {
839 - fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
840 + appendOp(saveOp);
841 }
842 - fRXPat->fCompiledPat->addElement(op, *fStatus);
843 + appendOp(op);
844 }
845 return TRUE;
846 }
847 Index: source/i18n/regexcmp.h
848 ===================================================================
849 --- source/i18n/regexcmp.h (revision 292943)
850 +++ source/i18n/regexcmp.h (working copy)
851 @@ -103,6 +103,11 @@
852 void fixLiterals(UBool split=FALSE); // Generate code for pendi ng literal characters.
853 void insertOp(int32_t where); // Open up a slot for a ne w op in the
854 // generated code at the specified location.
855 + void appendOp(int32_t op); // Append a new op to the compiled pattern.
856 + int32_t allocateData(int32_t size); // Allocate space in the m atcher data area.
857 + // Return index of the n ewly allocated data.
858 + int32_t allocateStackData(int32_t size); // Allocate space in the m atch back-track stack frame.
859 + // Return offset index i n the frame.
860 int32_t minMatchLength(int32_t start,
861 int32_t end);
862 int32_t maxMatchLength(int32_t start,
863 Index: source/i18n/regeximp.cpp
864 ===================================================================
865 --- source/i18n/regeximp.cpp (revision 292709)
866 +++ source/i18n/regeximp.cpp (working copy)
867 @@ -12,6 +12,8 @@
868
869 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
870 #include "regeximp.h"
871 +
872 +#include "uassert.h"
873 #include "unicode/utf16.h"
874
875 U_NAMESPACE_BEGIN
876 @@ -113,7 +115,27 @@
877 return fIndex;
878 }
879
880 +// Assemble a pcode instruction from the opcode and operand values.
881 +// Out-of-range values should not occur - if they do it is from an internal
882 +// error in the regex compiler.
883
884 +// TODO: move into regexcmp, where it has access to fStatus.
885 +// NOP cleanly if U_FAILURE.
886 +// Set U_REGEX_INTERNAL_ERROR on bad operands.
887 +
888 +int32_t URX_BUILD(int32_t type, int32_t val) {
889 + if (type < 0 || type > 255) {
890 + U_ASSERT(FALSE);
891 + type = URX_RESERVED_OP;
892 + }
893 + if (val > 0x00ffffff) {
894 + U_ASSERT(FALSE);
895 + val = 0;
896 + }
897 + return (type << 24) | val;
898 +}
899 +
900 +
901 U_NAMESPACE_END
902
903 #endif
904 Index: source/i18n/regeximp.h
905 ===================================================================
906 --- source/i18n/regeximp.h (revision 292709)
907 +++ source/i18n/regeximp.h (working copy)
908 @@ -254,7 +254,7 @@
909 //
910 // Convenience macros for assembling and disassembling a compiled operation.
911 //
912 -#define URX_BUILD(type, val) (int32_t)((type << 24) | (val))
913 +int32_t URX_BUILD(int32_t val, int32_t type);
914 #define URX_TYPE(x) ((uint32_t)(x) >> 24)
915 #define URX_VAL(x) ((x) & 0xffffff)
916
917 Index: source/test/intltest/regextst.cpp
918 ===================================================================
919 --- source/test/intltest/regextst.cpp (revision 292709)
920 +++ source/test/intltest/regextst.cpp (working copy)
921 @@ -131,6 +131,9 @@
922 case 21: name = "Bug 9283";
923 if (exec) Bug9283();
924 break;
925 + case 22: name = "TestBug11371";
926 + if (exec) TestBug11371();
927 + break;
928
929 default: name = "";
930 break; //needed to end loop
931 @@ -5229,5 +5232,47 @@
932 }
933 }
934
935 +void RegexTest::TestBug11371() {
936 + UErrorCode status = U_ZERO_ERROR;
937 + UnicodeString patternString;
938 +
939 + for (int i=0; i<8000000; i++) {
940 + patternString.append(UnicodeString("()"));
941 + }
942 + LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
943 + if (status != U_REGEX_PATTERN_TOO_BIG) {
944 + errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s .",
945 + __FILE__, __LINE__, u_errorName(status));
946 + }
947 +
948 + status = U_ZERO_ERROR;
949 + patternString = "(";
950 + for (int i=0; i<20000000; i++) {
951 + patternString.append(UnicodeString("A++"));
952 + }
953 + patternString.append(UnicodeString("){0}B++"));
954 + LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString , 0, status));
955 + if (status != U_REGEX_PATTERN_TOO_BIG) {
956 + errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s .",
957 + __FILE__, __LINE__, u_errorName(status));
958 + }
959 +
960 + // Pattern with too much string data, such that string indexes overflow ope rand data.
961 + status = U_ZERO_ERROR;
962 + patternString = "";
963 + while (patternString.length() < 0x00ffffff) {
964 + patternString.append(UnicodeString("stuff and things dont you know, the se are a few of my favorite strings\n"));
965 + }
966 + patternString.append(UnicodeString("X? trailing string"));
967 + LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString , 0, status));
968 + compiledPat3->dumpPattern();
969 + if (status != U_REGEX_PATTERN_TOO_BIG) {
970 + errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s .",
971 + __FILE__, __LINE__, u_errorName(status));
972 + }
973 +
974 +
975 +
976 +}
977 +
978 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
979 -
980 Index: source/test/intltest/regextst.h
981 ===================================================================
982 --- source/test/intltest/regextst.h (revision 292709)
983 +++ source/test/intltest/regextst.h (working copy)
984 @@ -47,6 +47,7 @@
985 virtual void Bug7029();
986 virtual void Bug9283();
987 virtual void CheckInvBufSize();
988 + virtual void TestBug11371();
989
990 // The following functions are internal to the regexp tests.
991 virtual void assertUText(const char *expected, UText *actual, const char *f ile, int line);
OLDNEW
« no previous file with comments | « patches/regex.patch ('k') | patches/vscomp.patch » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698