OLD | NEW |
1 /* | 1 /* |
2 ************************************************************************** | 2 ************************************************************************** |
3 * Copyright (C) 2002-2014 International Business Machines Corporation * | 3 * Copyright (C) 2002-2015 International Business Machines Corporation * |
4 * and others. All rights reserved. * | 4 * and others. All rights reserved. * |
5 ************************************************************************** | 5 ************************************************************************** |
6 */ | 6 */ |
7 // | 7 // |
8 // file: rematch.cpp | 8 // file: rematch.cpp |
9 // | 9 // |
10 // Contains the implementation of class RegexMatcher, | 10 // Contains the implementation of class RegexMatcher, |
11 // which is one of the main API classes for the ICU regular expression p
ackage. | 11 // which is one of the main API classes for the ICU regular expression p
ackage. |
12 // | 12 // |
13 | 13 |
(...skipping 28 matching lines...) Expand all Loading... |
42 // backtrack point. | 42 // backtrack point. |
43 // | 43 // |
44 static const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000; | 44 static const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000; |
45 | 45 |
46 // Time limit counter constant. | 46 // Time limit counter constant. |
47 // Time limits for expression evaluation are in terms of quanta of work by | 47 // Time limits for expression evaluation are in terms of quanta of work by |
48 // the engine, each of which is 10,000 state saves. | 48 // the engine, each of which is 10,000 state saves. |
49 // This constant determines that state saves per tick number. | 49 // This constant determines that state saves per tick number. |
50 static const int32_t TIMER_INITIAL_VALUE = 10000; | 50 static const int32_t TIMER_INITIAL_VALUE = 10000; |
51 | 51 |
| 52 |
| 53 // Test for any of the Unicode line terminating characters. |
| 54 static inline UBool isLineTerminator(UChar32 c) { |
| 55 if (c & ~(0x0a | 0x0b | 0x0c | 0x0d | 0x85 | 0x2028 | 0x2029)) { |
| 56 return false; |
| 57 } |
| 58 return (c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029; |
| 59 } |
| 60 |
52 //----------------------------------------------------------------------------- | 61 //----------------------------------------------------------------------------- |
53 // | 62 // |
54 // Constructor and Destructor | 63 // Constructor and Destructor |
55 // | 64 // |
56 //----------------------------------------------------------------------------- | 65 //----------------------------------------------------------------------------- |
57 RegexMatcher::RegexMatcher(const RegexPattern *pat) { | 66 RegexMatcher::RegexMatcher(const RegexPattern *pat) { |
58 fDeferredStatus = U_ZERO_ERROR; | 67 fDeferredStatus = U_ZERO_ERROR; |
59 init(fDeferredStatus); | 68 init(fDeferredStatus); |
60 if (U_FAILURE(fDeferredStatus)) { | 69 if (U_FAILURE(fDeferredStatus)) { |
61 return; | 70 return; |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
209 fDeferredStatus = status; | 218 fDeferredStatus = status; |
210 fData = fSmallData; | 219 fData = fSmallData; |
211 fWordBreakItr = NULL; | 220 fWordBreakItr = NULL; |
212 | 221 |
213 fStack = NULL; | 222 fStack = NULL; |
214 fInputText = NULL; | 223 fInputText = NULL; |
215 fAltInputText = NULL; | 224 fAltInputText = NULL; |
216 fInput = NULL; | 225 fInput = NULL; |
217 fInputLength = 0; | 226 fInputLength = 0; |
218 fInputUniStrMaybeMutable = FALSE; | 227 fInputUniStrMaybeMutable = FALSE; |
219 | |
220 if (U_FAILURE(status)) { | |
221 fDeferredStatus = status; | |
222 } | |
223 } | 228 } |
224 | 229 |
225 // | 230 // |
226 // init2() Common initialization for use by RegexMatcher constructors, part 2
. | 231 // init2() Common initialization for use by RegexMatcher constructors, part 2
. |
227 // This handles the common setup to be done after the Pattern is avai
lable. | 232 // This handles the common setup to be done after the Pattern is avai
lable. |
228 // | 233 // |
229 void RegexMatcher::init2(UText *input, UErrorCode &status) { | 234 void RegexMatcher::init2(UText *input, UErrorCode &status) { |
230 if (U_FAILURE(status)) { | 235 if (U_FAILURE(status)) { |
231 fDeferredStatus = status; | 236 fDeferredStatus = status; |
232 return; | 237 return; |
(...skipping 17 matching lines...) Expand all Loading... |
250 setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status); | 255 setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status); |
251 if (U_FAILURE(status)) { | 256 if (U_FAILURE(status)) { |
252 fDeferredStatus = status; | 257 fDeferredStatus = status; |
253 return; | 258 return; |
254 } | 259 } |
255 } | 260 } |
256 | 261 |
257 | 262 |
258 static const UChar BACKSLASH = 0x5c; | 263 static const UChar BACKSLASH = 0x5c; |
259 static const UChar DOLLARSIGN = 0x24; | 264 static const UChar DOLLARSIGN = 0x24; |
| 265 static const UChar LEFTBRACKET = 0x7b; |
| 266 static const UChar RIGHTBRACKET = 0x7d; |
| 267 |
260 //------------------------------------------------------------------------------
-- | 268 //------------------------------------------------------------------------------
-- |
261 // | 269 // |
262 // appendReplacement | 270 // appendReplacement |
263 // | 271 // |
264 //------------------------------------------------------------------------------
-- | 272 //------------------------------------------------------------------------------
-- |
265 RegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest, | 273 RegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest, |
266 const UnicodeString &replacement, | 274 const UnicodeString &replacement, |
267 UErrorCode &status) { | 275 UErrorCode &status) { |
268 UText replacementText = UTEXT_INITIALIZER; | 276 UText replacementText = UTEXT_INITIALIZER; |
269 | 277 |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
324 uprv_free(inputChars); | 332 uprv_free(inputChars); |
325 } | 333 } |
326 } | 334 } |
327 fAppendPosition = fMatchEnd; | 335 fAppendPosition = fMatchEnd; |
328 | 336 |
329 | 337 |
330 // scan the replacement text, looking for substitutions ($n) and \escapes. | 338 // scan the replacement text, looking for substitutions ($n) and \escapes. |
331 // TODO: optimize this loop by efficiently scanning for '$' or '\', | 339 // TODO: optimize this loop by efficiently scanning for '$' or '\', |
332 // move entire ranges not containing substitutions. | 340 // move entire ranges not containing substitutions. |
333 UTEXT_SETNATIVEINDEX(replacement, 0); | 341 UTEXT_SETNATIVEINDEX(replacement, 0); |
334 UChar32 c = UTEXT_NEXT32(replacement); | 342 for (UChar32 c = UTEXT_NEXT32(replacement); U_SUCCESS(status) && c != U_SENT
INEL; c = UTEXT_NEXT32(replacement)) { |
335 while (c != U_SENTINEL) { | |
336 if (c == BACKSLASH) { | 343 if (c == BACKSLASH) { |
337 // Backslash Escape. Copy the following char out without further ch
ecks. | 344 // Backslash Escape. Copy the following char out without further ch
ecks. |
338 // Note: Surrogate pairs don't need any special
handling | 345 // Note: Surrogate pairs don't need any special
handling |
339 // The second half wont be a '$' or a '\',
and | 346 // The second half wont be a '$' or a '\',
and |
340 // will move to the dest normally on the n
ext | 347 // will move to the dest normally on the n
ext |
341 // loop iteration. | 348 // loop iteration. |
342 c = UTEXT_CURRENT32(replacement); | 349 c = UTEXT_CURRENT32(replacement); |
343 if (c == U_SENTINEL) { | 350 if (c == U_SENTINEL) { |
344 break; | 351 break; |
345 } | 352 } |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
391 destLen += utext_replace(dest, destLen, destLen, &c16, 1, &statu
s); | 398 destLen += utext_replace(dest, destLen, destLen, &c16, 1, &statu
s); |
392 } else { | 399 } else { |
393 UChar surrogate[2]; | 400 UChar surrogate[2]; |
394 surrogate[0] = U16_LEAD(c); | 401 surrogate[0] = U16_LEAD(c); |
395 surrogate[1] = U16_TRAIL(c); | 402 surrogate[1] = U16_TRAIL(c); |
396 if (U_SUCCESS(status)) { | 403 if (U_SUCCESS(status)) { |
397 destLen += utext_replace(dest, destLen, destLen, surrogate,
2, &status); | 404 destLen += utext_replace(dest, destLen, destLen, surrogate,
2, &status); |
398 } | 405 } |
399 } | 406 } |
400 } else { | 407 } else { |
401 // We've got a $. Pick up a capture group number if one follows. | 408 // We've got a $. Pick up a capture group name or number if one fol
lows. |
402 // Consume at most the number of digits necessary for the largest ca
pture | 409 // Consume digits so long as the resulting group number <= the numbe
r of |
403 // number that is valid for this pattern. | 410 // number of capture groups in the pattern. |
404 | 411 |
| 412 int32_t groupNum = 0; |
405 int32_t numDigits = 0; | 413 int32_t numDigits = 0; |
406 int32_t groupNum = 0; | 414 UChar32 nextChar = utext_current32(replacement); |
407 UChar32 digitC; | 415 if (nextChar == LEFTBRACKET) { |
408 for (;;) { | 416 // Scan for a Named Capture Group, ${name}. |
409 digitC = UTEXT_CURRENT32(replacement); | 417 UnicodeString groupName; |
410 if (digitC == U_SENTINEL) { | 418 utext_next32(replacement); |
411 break; | 419 while(U_SUCCESS(status) && nextChar != RIGHTBRACKET) { |
| 420 nextChar = utext_next32(replacement); |
| 421 if (nextChar == U_SENTINEL) { |
| 422 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 423 } else if ((nextChar >= 0x41 && nextChar <= 0x5a) || /
/ A..Z |
| 424 (nextChar >= 0x61 && nextChar <= 0x7a) || /
/ a..z |
| 425 (nextChar >= 0x31 && nextChar <= 0x39)) { /
/ 0..9 |
| 426 groupName.append(nextChar); |
| 427 } else if (nextChar == RIGHTBRACKET) { |
| 428 groupNum = uhash_geti(fPattern->fNamedCaptureMap, &group
Name); |
| 429 if (groupNum == 0) { |
| 430 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 431 } |
| 432 } else { |
| 433 // Character was something other than a name char or a c
losing '}' |
| 434 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 435 } |
412 } | 436 } |
413 if (u_isdigit(digitC) == FALSE) { | 437 |
414 break; | 438 } else if (u_isdigit(nextChar)) { |
| 439 // $n Scan for a capture group number |
| 440 int32_t numCaptureGroups = fPattern->fGroupMap->size(); |
| 441 for (;;) { |
| 442 nextChar = UTEXT_CURRENT32(replacement); |
| 443 if (nextChar == U_SENTINEL) { |
| 444 break; |
| 445 } |
| 446 if (u_isdigit(nextChar) == FALSE) { |
| 447 break; |
| 448 } |
| 449 int32_t nextDigitVal = u_charDigitValue(nextChar); |
| 450 if (groupNum*10 + nextDigitVal > numCaptureGroups) { |
| 451 // Don't consume the next digit if it makes the capture
group number too big. |
| 452 if (numDigits == 0) { |
| 453 status = U_INDEX_OUTOFBOUNDS_ERROR; |
| 454 } |
| 455 break; |
| 456 } |
| 457 (void)UTEXT_NEXT32(replacement); |
| 458 groupNum=groupNum*10 + nextDigitVal; |
| 459 ++numDigits; |
415 } | 460 } |
416 (void)UTEXT_NEXT32(replacement); | 461 } else { |
417 groupNum=groupNum*10 + u_charDigitValue(digitC); | 462 // $ not followed by capture group name or number. |
418 numDigits++; | 463 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
419 if (numDigits >= fPattern->fMaxCaptureDigits) { | |
420 break; | |
421 } | |
422 } | 464 } |
423 | 465 |
424 | 466 if (U_SUCCESS(status)) { |
425 if (numDigits == 0) { | |
426 // The $ didn't introduce a group number at all. | |
427 // Treat it as just part of the substitution text. | |
428 UChar c16 = DOLLARSIGN; | |
429 destLen += utext_replace(dest, destLen, destLen, &c16, 1, &statu
s); | |
430 } else { | |
431 // Finally, append the capture group data to the destination. | |
432 destLen += appendGroup(groupNum, dest, status); | 467 destLen += appendGroup(groupNum, dest, status); |
433 if (U_FAILURE(status)) { | |
434 // Can fail if group number is out of range. | |
435 break; | |
436 } | |
437 } | 468 } |
438 } | 469 } // End of $ capture group handling |
439 | 470 } // End of per-character loop through the replacement string. |
440 if (U_FAILURE(status)) { | |
441 break; | |
442 } else { | |
443 c = UTEXT_NEXT32(replacement); | |
444 } | |
445 } | |
446 | 471 |
447 return *this; | 472 return *this; |
448 } | 473 } |
449 | 474 |
450 | 475 |
451 | 476 |
452 //------------------------------------------------------------------------------
-- | 477 //------------------------------------------------------------------------------
-- |
453 // | 478 // |
454 // appendTail Intended to be used in conjunction with appendReplacement() | 479 // appendTail Intended to be used in conjunction with appendReplacement() |
455 // To the destination string, append everything following | 480 // To the destination string, append everything following |
(...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
810 c = UTEXT_NEXT32(fInputText); | 835 c = UTEXT_NEXT32(fInputText); |
811 startPos = UTEXT_GETNATIVEINDEX(fInputText); | 836 startPos = UTEXT_GETNATIVEINDEX(fInputText); |
812 // Note that it's perfectly OK for a pattern to have a zero-
length | 837 // Note that it's perfectly OK for a pattern to have a zero-
length |
813 // match at the end of a string, so we must make sure that
the loop | 838 // match at the end of a string, so we must make sure that
the loop |
814 // runs with startPos == testStartLimit the last time thro
ugh. | 839 // runs with startPos == testStartLimit the last time thro
ugh. |
815 if (findProgressInterrupt(startPos, status)) | 840 if (findProgressInterrupt(startPos, status)) |
816 return FALSE; | 841 return FALSE; |
817 } | 842 } |
818 } else { | 843 } else { |
819 for (;;) { | 844 for (;;) { |
820 if (((c & 0x7f) <= 0x29) && // First quickly bypass as m
any chars as possible | 845 if (isLineTerminator(c)) { |
821 ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x202
9 )) { | 846 if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURREN
T32(fInputText) == 0x0a) { |
822 if (c == 0x0d && startPos < fActiveLimit && UTEXT_CU
RRENT32(fInputText) == 0x0a) { | 847 (void)UTEXT_NEXT32(fInputText); |
823 (void)UTEXT_NEXT32(fInputText); | 848 startPos = UTEXT_GETNATIVEINDEX(fInputText); |
824 startPos = UTEXT_GETNATIVEINDEX(fInputText); | 849 } |
825 } | 850 MatchAt(startPos, FALSE, status); |
826 MatchAt(startPos, FALSE, status); | 851 if (U_FAILURE(status)) { |
827 if (U_FAILURE(status)) { | 852 return FALSE; |
828 return FALSE; | 853 } |
829 } | 854 if (fMatch) { |
830 if (fMatch) { | 855 return TRUE; |
831 return TRUE; | 856 } |
832 } | 857 UTEXT_SETNATIVEINDEX(fInputText, startPos); |
833 UTEXT_SETNATIVEINDEX(fInputText, startPos); | |
834 } | 858 } |
835 if (startPos >= testStartLimit) { | 859 if (startPos >= testStartLimit) { |
836 fMatch = FALSE; | 860 fMatch = FALSE; |
837 fHitEnd = TRUE; | 861 fHitEnd = TRUE; |
838 return FALSE; | 862 return FALSE; |
839 } | 863 } |
840 c = UTEXT_NEXT32(fInputText); | 864 c = UTEXT_NEXT32(fInputText); |
841 startPos = UTEXT_GETNATIVEINDEX(fInputText); | 865 startPos = UTEXT_GETNATIVEINDEX(fInputText); |
842 // Note that it's perfectly OK for a pattern to have a zero-
length | 866 // Note that it's perfectly OK for a pattern to have a zero-
length |
843 // match at the end of a string, so we must make sure that
the loop | 867 // match at the end of a string, so we must make sure that
the loop |
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1071 U16_FWD_1(inputBuf, startPos, fActiveLimit); | 1095 U16_FWD_1(inputBuf, startPos, fActiveLimit); |
1072 // Note that it's perfectly OK for a pattern to have a zero-leng
th | 1096 // Note that it's perfectly OK for a pattern to have a zero-leng
th |
1073 // match at the end of a string, so we must make sure that the
loop | 1097 // match at the end of a string, so we must make sure that the
loop |
1074 // runs with startPos == testLen the last time through. | 1098 // runs with startPos == testLen the last time through. |
1075 if (findProgressInterrupt(startPos, status)) | 1099 if (findProgressInterrupt(startPos, status)) |
1076 return FALSE; | 1100 return FALSE; |
1077 } | 1101 } |
1078 } else { | 1102 } else { |
1079 for (;;) { | 1103 for (;;) { |
1080 c = inputBuf[startPos-1]; | 1104 c = inputBuf[startPos-1]; |
1081 if (((c & 0x7f) <= 0x29) && // First quickly bypass as many
chars as possible | 1105 if (isLineTerminator(c)) { |
1082 ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 ))
{ | |
1083 if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPo
s] == 0x0a) { | 1106 if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPo
s] == 0x0a) { |
1084 startPos++; | 1107 startPos++; |
1085 } | 1108 } |
1086 MatchChunkAt(startPos, FALSE, status); | 1109 MatchChunkAt(startPos, FALSE, status); |
1087 if (U_FAILURE(status)) { | 1110 if (U_FAILURE(status)) { |
1088 return FALSE; | 1111 return FALSE; |
1089 } | 1112 } |
1090 if (fMatch) { | 1113 if (fMatch) { |
1091 return TRUE; | 1114 return TRUE; |
1092 } | 1115 } |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1168 group_len = e - s; | 1191 group_len = e - s; |
1169 | 1192 |
1170 dest = utext_clone(dest, fInputText, FALSE, TRUE, &status); | 1193 dest = utext_clone(dest, fInputText, FALSE, TRUE, &status); |
1171 if (dest) | 1194 if (dest) |
1172 UTEXT_SETNATIVEINDEX(dest, s); | 1195 UTEXT_SETNATIVEINDEX(dest, s); |
1173 return dest; | 1196 return dest; |
1174 } | 1197 } |
1175 | 1198 |
1176 UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const { | 1199 UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const { |
1177 UnicodeString result; | 1200 UnicodeString result; |
1178 if (U_FAILURE(status)) { | 1201 int64_t groupStart = start64(groupNum, status); |
| 1202 int64_t groupEnd = end64(groupNum, status); |
| 1203 if (U_FAILURE(status) || groupStart == -1 || groupStart == groupEnd) { |
1179 return result; | 1204 return result; |
1180 } | 1205 } |
1181 UText resultText = UTEXT_INITIALIZER; | 1206 |
1182 utext_openUnicodeString(&resultText, &result, &status); | 1207 // Get the group length using a utext_extract preflight. |
1183 group(groupNum, &resultText, status); | 1208 // UText is actually pretty efficient at this when underlying encoding is
UTF-16. |
1184 utext_close(&resultText); | 1209 int32_t length = utext_extract(fInputText, groupStart, groupEnd, NULL, 0, &s
tatus); |
| 1210 if (status != U_BUFFER_OVERFLOW_ERROR) { |
| 1211 return result; |
| 1212 } |
| 1213 |
| 1214 status = U_ZERO_ERROR; |
| 1215 UChar *buf = result.getBuffer(length); |
| 1216 if (buf == NULL) { |
| 1217 status = U_MEMORY_ALLOCATION_ERROR; |
| 1218 } else { |
| 1219 int32_t extractLength = utext_extract(fInputText, groupStart, groupEnd,
buf, length, &status); |
| 1220 result.releaseBuffer(extractLength); |
| 1221 U_ASSERT(length == extractLength); |
| 1222 } |
1185 return result; | 1223 return result; |
1186 } | 1224 } |
1187 | 1225 |
1188 | 1226 |
1189 // Return deep (mutable) clone | |
1190 // Technology Preview (as an API), but note that the UnicodeString API is i
mplemented | |
1191 // using this function. | |
1192 UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) co
nst { | |
1193 if (U_FAILURE(status)) { | |
1194 return dest; | |
1195 } | |
1196 | |
1197 if (U_FAILURE(fDeferredStatus)) { | |
1198 status = fDeferredStatus; | |
1199 } else if (fMatch == FALSE) { | |
1200 status = U_REGEX_INVALID_STATE; | |
1201 } else if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { | |
1202 status = U_INDEX_OUTOFBOUNDS_ERROR; | |
1203 } | |
1204 if (U_FAILURE(status)) { | |
1205 return dest; | |
1206 } | |
1207 | |
1208 int64_t s, e; | |
1209 if (groupNum == 0) { | |
1210 s = fMatchStart; | |
1211 e = fMatchEnd; | |
1212 } else { | |
1213 int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); | |
1214 U_ASSERT(groupOffset < fPattern->fFrameSize); | |
1215 U_ASSERT(groupOffset >= 0); | |
1216 s = fFrame->fExtra[groupOffset]; | |
1217 e = fFrame->fExtra[groupOffset+1]; | |
1218 } | |
1219 | |
1220 if (s < 0) { | |
1221 // A capture group wasn't part of the match | |
1222 if (dest) { | |
1223 utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); | |
1224 return dest; | |
1225 } else { | |
1226 return utext_openUChars(NULL, NULL, 0, &status); | |
1227 } | |
1228 } | |
1229 U_ASSERT(s <= e); | |
1230 | |
1231 if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { | |
1232 U_ASSERT(e <= fInputLength); | |
1233 if (dest) { | |
1234 utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkCo
ntents+s, (int32_t)(e-s), &status); | |
1235 } else { | |
1236 UText groupText = UTEXT_INITIALIZER; | |
1237 utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &stat
us); | |
1238 dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status); | |
1239 utext_close(&groupText); | |
1240 } | |
1241 } else { | |
1242 int32_t len16; | |
1243 if (UTEXT_USES_U16(fInputText)) { | |
1244 len16 = (int32_t)(e-s); | |
1245 } else { | |
1246 UErrorCode lengthStatus = U_ZERO_ERROR; | |
1247 len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus); | |
1248 } | |
1249 UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); | |
1250 if (groupChars == NULL) { | |
1251 status = U_MEMORY_ALLOCATION_ERROR; | |
1252 return dest; | |
1253 } | |
1254 utext_extract(fInputText, s, e, groupChars, len16+1, &status); | |
1255 | |
1256 if (dest) { | |
1257 utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16,
&status); | |
1258 } else { | |
1259 UText groupText = UTEXT_INITIALIZER; | |
1260 utext_openUChars(&groupText, groupChars, len16, &status); | |
1261 dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status); | |
1262 utext_close(&groupText); | |
1263 } | |
1264 | |
1265 uprv_free(groupChars); | |
1266 } | |
1267 return dest; | |
1268 } | |
1269 | |
1270 //------------------------------------------------------------------------------
-- | 1227 //------------------------------------------------------------------------------
-- |
1271 // | 1228 // |
1272 // appendGroup() -- currently internal only, appends a group to a UText rather | 1229 // appendGroup() -- currently internal only, appends a group to a UText rather |
1273 // than replacing its contents | 1230 // than replacing its contents |
1274 // | 1231 // |
1275 //------------------------------------------------------------------------------
-- | 1232 //------------------------------------------------------------------------------
-- |
1276 | 1233 |
1277 int64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &sta
tus) const { | 1234 int64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &sta
tus) const { |
1278 if (U_FAILURE(status)) { | 1235 if (U_FAILURE(status)) { |
1279 return 0; | 1236 return 0; |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1340 | 1297 |
1341 //------------------------------------------------------------------------------
-- | 1298 //------------------------------------------------------------------------------
-- |
1342 // | 1299 // |
1343 // groupCount() | 1300 // groupCount() |
1344 // | 1301 // |
1345 //------------------------------------------------------------------------------
-- | 1302 //------------------------------------------------------------------------------
-- |
1346 int32_t RegexMatcher::groupCount() const { | 1303 int32_t RegexMatcher::groupCount() const { |
1347 return fPattern->fGroupMap->size(); | 1304 return fPattern->fGroupMap->size(); |
1348 } | 1305 } |
1349 | 1306 |
1350 | |
1351 | |
1352 //------------------------------------------------------------------------------
-- | 1307 //------------------------------------------------------------------------------
-- |
1353 // | 1308 // |
1354 // hasAnchoringBounds() | 1309 // hasAnchoringBounds() |
1355 // | 1310 // |
1356 //------------------------------------------------------------------------------
-- | 1311 //------------------------------------------------------------------------------
-- |
1357 UBool RegexMatcher::hasAnchoringBounds() const { | 1312 UBool RegexMatcher::hasAnchoringBounds() const { |
1358 return fAnchoringBounds; | 1313 return fAnchoringBounds; |
1359 } | 1314 } |
1360 | 1315 |
1361 | 1316 |
(...skipping 515 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1877 fTickCounter = TIMER_INITIAL_VALUE; | 1832 fTickCounter = TIMER_INITIAL_VALUE; |
1878 //resetStack(); // more expensive than it looks... | 1833 //resetStack(); // more expensive than it looks... |
1879 } | 1834 } |
1880 | 1835 |
1881 | 1836 |
1882 RegexMatcher &RegexMatcher::reset(const UnicodeString &input) { | 1837 RegexMatcher &RegexMatcher::reset(const UnicodeString &input) { |
1883 fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStat
us); | 1838 fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStat
us); |
1884 if (fPattern->fNeedsAltInput) { | 1839 if (fPattern->fNeedsAltInput) { |
1885 fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDe
ferredStatus); | 1840 fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDe
ferredStatus); |
1886 } | 1841 } |
| 1842 if (U_FAILURE(fDeferredStatus)) { |
| 1843 return *this; |
| 1844 } |
1887 fInputLength = utext_nativeLength(fInputText); | 1845 fInputLength = utext_nativeLength(fInputText); |
1888 | 1846 |
1889 reset(); | 1847 reset(); |
1890 delete fInput; | 1848 delete fInput; |
1891 fInput = NULL; | 1849 fInput = NULL; |
1892 | 1850 |
1893 // Do the following for any UnicodeString. | 1851 // Do the following for any UnicodeString. |
1894 // This is for compatibility for those clients who modify the input string
"live" during regex operations. | 1852 // This is for compatibility for those clients who modify the input string
"live" during regex operations. |
1895 fInputUniStrMaybeMutable = TRUE; | 1853 fInputUniStrMaybeMutable = TRUE; |
1896 | 1854 |
1897 if (fWordBreakItr != NULL) { | 1855 if (fWordBreakItr != NULL) { |
1898 #if UCONFIG_NO_BREAK_ITERATION==0 | 1856 #if UCONFIG_NO_BREAK_ITERATION==0 |
1899 UErrorCode status = U_ZERO_ERROR; | 1857 UErrorCode status = U_ZERO_ERROR; |
1900 fWordBreakItr->setText(fInputText, status); | 1858 fWordBreakItr->setText(fInputText, status); |
1901 #endif | 1859 #endif |
1902 } | 1860 } |
1903 return *this; | 1861 return *this; |
1904 } | 1862 } |
1905 | 1863 |
1906 | 1864 |
1907 RegexMatcher &RegexMatcher::reset(UText *input) { | 1865 RegexMatcher &RegexMatcher::reset(UText *input) { |
1908 if (fInputText != input) { | 1866 if (fInputText != input) { |
1909 fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatu
s); | 1867 fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatu
s); |
1910 if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText,
fInputText, FALSE, TRUE, &fDeferredStatus); | 1868 if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText,
fInputText, FALSE, TRUE, &fDeferredStatus); |
| 1869 if (U_FAILURE(fDeferredStatus)) { |
| 1870 return *this; |
| 1871 } |
1911 fInputLength = utext_nativeLength(fInputText); | 1872 fInputLength = utext_nativeLength(fInputText); |
1912 | 1873 |
1913 delete fInput; | 1874 delete fInput; |
1914 fInput = NULL; | 1875 fInput = NULL; |
1915 | 1876 |
1916 if (fWordBreakItr != NULL) { | 1877 if (fWordBreakItr != NULL) { |
1917 #if UCONFIG_NO_BREAK_ITERATION==0 | 1878 #if UCONFIG_NO_BREAK_ITERATION==0 |
1918 UErrorCode status = U_ZERO_ERROR; | 1879 UErrorCode status = U_ZERO_ERROR; |
1919 fWordBreakItr->setText(input, status); | 1880 fWordBreakItr->setText(input, status); |
1920 #endif | 1881 #endif |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1988 // | 1949 // |
1989 // setTrace | 1950 // setTrace |
1990 // | 1951 // |
1991 //------------------------------------------------------------------------------
-- | 1952 //------------------------------------------------------------------------------
-- |
1992 void RegexMatcher::setTrace(UBool state) { | 1953 void RegexMatcher::setTrace(UBool state) { |
1993 fTraceDebug = state; | 1954 fTraceDebug = state; |
1994 } | 1955 } |
1995 | 1956 |
1996 | 1957 |
1997 | 1958 |
| 1959 /** |
| 1960 * UText, replace entire contents of the destination UText with a substring of
the source UText. |
| 1961 * |
| 1962 * @param src The source UText |
| 1963 * @param dest The destination UText. Must be writable. |
| 1964 * May be NULL, in which case a new UText will be allocated. |
| 1965 * @param start Start index of source substring. |
| 1966 * @param limit Limit index of source substring. |
| 1967 * @param status An error code. |
| 1968 */ |
| 1969 static UText *utext_extract_replace(UText *src, UText *dest, int64_t start, int6
4_t limit, UErrorCode *status) { |
| 1970 if (U_FAILURE(*status)) { |
| 1971 return dest; |
| 1972 } |
| 1973 if (start == limit) { |
| 1974 if (dest) { |
| 1975 utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, status); |
| 1976 return dest; |
| 1977 } else { |
| 1978 return utext_openUChars(NULL, NULL, 0, status); |
| 1979 } |
| 1980 } |
| 1981 int32_t length = utext_extract(src, start, limit, NULL, 0, status); |
| 1982 if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { |
| 1983 return dest; |
| 1984 } |
| 1985 *status = U_ZERO_ERROR; |
| 1986 MaybeStackArray<UChar, 40> buffer; |
| 1987 if (length >= buffer.getCapacity()) { |
| 1988 UChar *newBuf = buffer.resize(length+1); // Leave space for terminatin
g Nul. |
| 1989 if (newBuf == NULL) { |
| 1990 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1991 } |
| 1992 } |
| 1993 utext_extract(src, start, limit, buffer.getAlias(), length+1, status); |
| 1994 if (dest) { |
| 1995 utext_replace(dest, 0, utext_nativeLength(dest), buffer.getAlias(), leng
th, status); |
| 1996 return dest; |
| 1997 } |
| 1998 |
| 1999 // Caller did not provide a prexisting UText. |
| 2000 // Open a new one, and have it adopt the text buffer storage. |
| 2001 if (U_FAILURE(*status)) { |
| 2002 return NULL; |
| 2003 } |
| 2004 int32_t ownedLength = 0; |
| 2005 UChar *ownedBuf = buffer.orphanOrClone(length+1, ownedLength); |
| 2006 if (ownedBuf == NULL) { |
| 2007 *status = U_MEMORY_ALLOCATION_ERROR; |
| 2008 return NULL; |
| 2009 } |
| 2010 UText *result = utext_openUChars(NULL, ownedBuf, length, status); |
| 2011 if (U_FAILURE(*status)) { |
| 2012 uprv_free(ownedBuf); |
| 2013 return NULL; |
| 2014 } |
| 2015 result->providerProperties |= (1 << UTEXT_PROVIDER_OWNS_TEXT); |
| 2016 return result; |
| 2017 } |
| 2018 |
| 2019 |
1998 //--------------------------------------------------------------------- | 2020 //--------------------------------------------------------------------- |
1999 // | 2021 // |
2000 // split | 2022 // split |
2001 // | 2023 // |
2002 //--------------------------------------------------------------------- | 2024 //--------------------------------------------------------------------- |
2003 int32_t RegexMatcher::split(const UnicodeString &input, | 2025 int32_t RegexMatcher::split(const UnicodeString &input, |
2004 UnicodeString dest[], | 2026 UnicodeString dest[], |
2005 int32_t destCapacity, | 2027 int32_t destCapacity, |
2006 UErrorCode &status) | 2028 UErrorCode &status) |
2007 { | 2029 { |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2154 // text goes out into the next n destination strings. | 2176 // text goes out into the next n destination strings. |
2155 int32_t groupNum; | 2177 int32_t groupNum; |
2156 for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { | 2178 for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { |
2157 if (i >= destCapacity-2) { | 2179 if (i >= destCapacity-2) { |
2158 // Never fill the last available output string with capture
group text. | 2180 // Never fill the last available output string with capture
group text. |
2159 // It will filled with the last field, the remainder of the | 2181 // It will filled with the last field, the remainder of the |
2160 // unsplit input text. | 2182 // unsplit input text. |
2161 break; | 2183 break; |
2162 } | 2184 } |
2163 i++; | 2185 i++; |
2164 dest[i] = group(groupNum, dest[i], status); | 2186 dest[i] = utext_extract_replace(fInputText, dest[i], |
| 2187 start64(groupNum, status), end64(
groupNum, status), &status); |
2165 } | 2188 } |
2166 | 2189 |
2167 if (nextOutputStringStart == fActiveLimit) { | 2190 if (nextOutputStringStart == fActiveLimit) { |
2168 // The delimiter was at the end of the string. We're done, but
first | 2191 // The delimiter was at the end of the string. We're done, but
first |
2169 // we output one last empty string, for the empty field followin
g | 2192 // we output one last empty string, for the empty field followin
g |
2170 // the delimiter at the end of input. | 2193 // the delimiter at the end of input. |
2171 if (i+1 < destCapacity) { | 2194 if (i+1 < destCapacity) { |
2172 ++i; | 2195 ++i; |
2173 if (dest[i] == NULL) { | 2196 if (dest[i] == NULL) { |
2174 dest[i] = utext_openUChars(NULL, NULL, 0, &status); | 2197 dest[i] = utext_openUChars(NULL, NULL, 0, &status); |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2466 // new stack frame to all -1. The -1s are needed for capture group li
mits, | 2489 // new stack frame to all -1. The -1s are needed for capture group li
mits, |
2467 // where they indicate that a group has not yet matched anything. | 2490 // where they indicate that a group has not yet matched anything. |
2468 //------------------------------------------------------------------------------
-- | 2491 //------------------------------------------------------------------------------
-- |
2469 REStackFrame *RegexMatcher::resetStack() { | 2492 REStackFrame *RegexMatcher::resetStack() { |
2470 // Discard any previous contents of the state save stack, and initialize a | 2493 // Discard any previous contents of the state save stack, and initialize a |
2471 // new stack frame with all -1 data. The -1s are needed for capture group
limits, | 2494 // new stack frame with all -1 data. The -1s are needed for capture group
limits, |
2472 // where they indicate that a group has not yet matched anything. | 2495 // where they indicate that a group has not yet matched anything. |
2473 fStack->removeAllElements(); | 2496 fStack->removeAllElements(); |
2474 | 2497 |
2475 REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrame
Size, fDeferredStatus); | 2498 REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrame
Size, fDeferredStatus); |
| 2499 if(U_FAILURE(fDeferredStatus)) { |
| 2500 return NULL; |
| 2501 } |
| 2502 |
2476 int32_t i; | 2503 int32_t i; |
2477 for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) { | 2504 for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) { |
2478 iFrame->fExtra[i] = -1; | 2505 iFrame->fExtra[i] = -1; |
2479 } | 2506 } |
2480 return iFrame; | 2507 return iFrame; |
2481 } | 2508 } |
2482 | 2509 |
2483 | 2510 |
2484 | 2511 |
2485 //------------------------------------------------------------------------------
-- | 2512 //------------------------------------------------------------------------------
-- |
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2653 // fp The top frame pointer when called. At return, a new | 2680 // fp The top frame pointer when called. At return, a new |
2654 // fame will be present | 2681 // fame will be present |
2655 // savePatIdx An index into the compiled pattern. Goes into the origina
l | 2682 // savePatIdx An index into the compiled pattern. Goes into the origina
l |
2656 // (not new) frame. If execution ever back-tracks out of the | 2683 // (not new) frame. If execution ever back-tracks out of the |
2657 // new frame, this will be where we continue from in the patt
ern. | 2684 // new frame, this will be where we continue from in the patt
ern. |
2658 // Return | 2685 // Return |
2659 // The new frame pointer. | 2686 // The new frame pointer. |
2660 // | 2687 // |
2661 //------------------------------------------------------------------------------
-- | 2688 //------------------------------------------------------------------------------
-- |
2662 inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatId
x, UErrorCode &status) { | 2689 inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatId
x, UErrorCode &status) { |
| 2690 if (U_FAILURE(status)) { |
| 2691 return fp; |
| 2692 } |
2663 // push storage for a new frame. | 2693 // push storage for a new frame. |
2664 int64_t *newFP = fStack->reserveBlock(fFrameSize, status); | 2694 int64_t *newFP = fStack->reserveBlock(fFrameSize, status); |
2665 if (newFP == NULL) { | 2695 if (U_FAILURE(status)) { |
2666 // Failure on attempted stack expansion. | 2696 // Failure on attempted stack expansion. |
2667 // Stack function set some other error code, change it to a more | 2697 // Stack function set some other error code, change it to a more |
2668 // specific one for regular expressions. | 2698 // specific one for regular expressions. |
2669 status = U_REGEX_STACK_OVERFLOW; | 2699 status = U_REGEX_STACK_OVERFLOW; |
2670 // We need to return a writable stack frame, so just return the | 2700 // We need to return a writable stack frame, so just return the |
2671 // previous frame. The match operation will stop quickly | 2701 // previous frame. The match operation will stop quickly |
2672 // because of the error status, after which the frame will never | 2702 // because of the error status, after which the frame will never |
2673 // be looked at again. | 2703 // be looked at again. |
2674 return fp; | 2704 return fp; |
2675 } | 2705 } |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2747 | 2777 |
2748 // Cache frequently referenced items from the compiled pattern | 2778 // Cache frequently referenced items from the compiled pattern |
2749 // | 2779 // |
2750 int64_t *pat = fPattern->fCompiledPat->getBuffer(); | 2780 int64_t *pat = fPattern->fCompiledPat->getBuffer(); |
2751 | 2781 |
2752 const UChar *litText = fPattern->fLiteralText.getBuffer(); | 2782 const UChar *litText = fPattern->fLiteralText.getBuffer(); |
2753 UVector *sets = fPattern->fSets; | 2783 UVector *sets = fPattern->fSets; |
2754 | 2784 |
2755 fFrameSize = fPattern->fFrameSize; | 2785 fFrameSize = fPattern->fFrameSize; |
2756 REStackFrame *fp = resetStack(); | 2786 REStackFrame *fp = resetStack(); |
| 2787 if (U_FAILURE(fDeferredStatus)) { |
| 2788 status = fDeferredStatus; |
| 2789 return; |
| 2790 } |
2757 | 2791 |
2758 fp->fPatIdx = 0; | 2792 fp->fPatIdx = 0; |
2759 fp->fInputIdx = startIdx; | 2793 fp->fInputIdx = startIdx; |
2760 | 2794 |
2761 // Zero out the pattern's static data | 2795 // Zero out the pattern's static data |
2762 int32_t i; | 2796 int32_t i; |
2763 for (i = 0; i<fPattern->fDataSize; i++) { | 2797 for (i = 0; i<fPattern->fDataSize; i++) { |
2764 fData[i] = 0; | 2798 fData[i] = 0; |
2765 } | 2799 } |
2766 | 2800 |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2900 fRequireEnd = TRUE; | 2934 fRequireEnd = TRUE; |
2901 break; | 2935 break; |
2902 } | 2936 } |
2903 | 2937 |
2904 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); | 2938 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
2905 | 2939 |
2906 // If we are positioned just before a new-line that is located a
t the | 2940 // If we are positioned just before a new-line that is located a
t the |
2907 // end of input, succeed. | 2941 // end of input, succeed. |
2908 UChar32 c = UTEXT_NEXT32(fInputText); | 2942 UChar32 c = UTEXT_NEXT32(fInputText); |
2909 if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { | 2943 if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { |
2910 if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x202
9) { | 2944 if (isLineTerminator(c)) { |
2911 // If not in the middle of a CR/LF sequence | 2945 // If not in the middle of a CR/LF sequence |
2912 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTE
XT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) { | 2946 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)U
TEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) { |
2913 // At new-line at end of input. Success | 2947 // At new-line at end of input. Success |
2914 fHitEnd = TRUE; | 2948 fHitEnd = TRUE; |
2915 fRequireEnd = TRUE; | 2949 fRequireEnd = TRUE; |
2916 | 2950 |
2917 break; | 2951 break; |
2918 } | 2952 } |
2919 } | 2953 } |
2920 } else { | 2954 } else { |
2921 UChar32 nextC = UTEXT_NEXT32(fInputText); | 2955 UChar32 nextC = UTEXT_NEXT32(fInputText); |
2922 if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInpu
tText) >= fAnchorLimit) { | 2956 if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInpu
tText) >= fAnchorLimit) { |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2958 if (fp->fInputIdx >= fAnchorLimit) { | 2992 if (fp->fInputIdx >= fAnchorLimit) { |
2959 // We really are at the end of input. Success. | 2993 // We really are at the end of input. Success. |
2960 fHitEnd = TRUE; | 2994 fHitEnd = TRUE; |
2961 fRequireEnd = TRUE; | 2995 fRequireEnd = TRUE; |
2962 break; | 2996 break; |
2963 } | 2997 } |
2964 // If we are positioned just before a new-line, succeed. | 2998 // If we are positioned just before a new-line, succeed. |
2965 // It makes no difference where the new-line is within the inpu
t. | 2999 // It makes no difference where the new-line is within the inpu
t. |
2966 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); | 3000 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
2967 UChar32 c = UTEXT_CURRENT32(fInputText); | 3001 UChar32 c = UTEXT_CURRENT32(fInputText); |
2968 if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) { | 3002 if (isLineTerminator(c)) { |
2969 // At a line end, except for the odd chance of being in th
e middle of a CR/LF sequence | 3003 // At a line end, except for the odd chance of being in th
e middle of a CR/LF sequence |
2970 // In multi-line mode, hitting a new-line just before the
end of input does not | 3004 // In multi-line mode, hitting a new-line just before the
end of input does not |
2971 // set the hitEnd or requireEnd flags | 3005 // set the hitEnd or requireEnd flags |
2972 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVI
OUS32(fInputText)==0x0d)) { | 3006 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVI
OUS32(fInputText)==0x0d)) { |
2973 break; | 3007 break; |
2974 } | 3008 } |
2975 } | 3009 } |
2976 // not at a new line. Fail. | 3010 // not at a new line. Fail. |
2977 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 3011 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
2978 } | 3012 } |
(...skipping 28 matching lines...) Expand all Loading... |
3007 case URX_CARET_M: // ^, test for start of line in muli
t-line mode | 3041 case URX_CARET_M: // ^, test for start of line in muli
t-line mode |
3008 { | 3042 { |
3009 if (fp->fInputIdx == fAnchorStart) { | 3043 if (fp->fInputIdx == fAnchorStart) { |
3010 // We are at the start input. Success. | 3044 // We are at the start input. Success. |
3011 break; | 3045 break; |
3012 } | 3046 } |
3013 // Check whether character just before the current pos is a new-l
ine | 3047 // Check whether character just before the current pos is a new-l
ine |
3014 // unless we are at the end of input | 3048 // unless we are at the end of input |
3015 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); | 3049 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
3016 UChar32 c = UTEXT_PREVIOUS32(fInputText); | 3050 UChar32 c = UTEXT_PREVIOUS32(fInputText); |
3017 if ((fp->fInputIdx < fAnchorLimit) && | 3051 if ((fp->fInputIdx < fAnchorLimit) && isLineTerminator(c)) { |
3018 ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { | |
3019 // It's a new-line. ^ is true. Success. | 3052 // It's a new-line. ^ is true. Success. |
3020 // TODO: what should be done with positions between a CR an
d LF? | 3053 // TODO: what should be done with positions between a CR an
d LF? |
3021 break; | 3054 break; |
3022 } | 3055 } |
3023 // Not at the start of a line. Fail. | 3056 // Not at the start of a line. Fail. |
3024 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 3057 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
3025 } | 3058 } |
3026 break; | 3059 break; |
3027 | 3060 |
3028 | 3061 |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3089 break; | 3122 break; |
3090 | 3123 |
3091 | 3124 |
3092 case URX_BACKSLASH_G: // Test for position at end of previous m
atch | 3125 case URX_BACKSLASH_G: // Test for position at end of previous m
atch |
3093 if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->
fInputIdx==fActiveStart))) { | 3126 if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->
fInputIdx==fActiveStart))) { |
3094 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 3127 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
3095 } | 3128 } |
3096 break; | 3129 break; |
3097 | 3130 |
3098 | 3131 |
| 3132 case URX_BACKSLASH_H: // Test for \h, horizontal white space. |
| 3133 { |
| 3134 if (fp->fInputIdx >= fActiveLimit) { |
| 3135 fHitEnd = TRUE; |
| 3136 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 3137 break; |
| 3138 } |
| 3139 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
| 3140 UChar32 c = UTEXT_NEXT32(fInputText); |
| 3141 int8_t ctype = u_charType(c); |
| 3142 UBool success = (ctype == U_SPACE_SEPARATOR || c == 9); // SPAC
E_SEPARATOR || TAB |
| 3143 success ^= (UBool)(opValue != 0); // flip sense for \H |
| 3144 if (success) { |
| 3145 fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); |
| 3146 } else { |
| 3147 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 3148 } |
| 3149 } |
| 3150 break; |
| 3151 |
| 3152 |
| 3153 case URX_BACKSLASH_R: // Test for \R, any line break sequence
. |
| 3154 { |
| 3155 if (fp->fInputIdx >= fActiveLimit) { |
| 3156 fHitEnd = TRUE; |
| 3157 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 3158 break; |
| 3159 } |
| 3160 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
| 3161 UChar32 c = UTEXT_NEXT32(fInputText); |
| 3162 if (isLineTerminator(c)) { |
| 3163 if (c == 0x0d && utext_current32(fInputText) == 0x0a) { |
| 3164 utext_next32(fInputText); |
| 3165 } |
| 3166 fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); |
| 3167 } else { |
| 3168 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 3169 } |
| 3170 } |
| 3171 break; |
| 3172 |
| 3173 |
| 3174 case URX_BACKSLASH_V: // \v, any single line ending character
. |
| 3175 { |
| 3176 if (fp->fInputIdx >= fActiveLimit) { |
| 3177 fHitEnd = TRUE; |
| 3178 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 3179 break; |
| 3180 } |
| 3181 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
| 3182 UChar32 c = UTEXT_NEXT32(fInputText); |
| 3183 UBool success = isLineTerminator(c); |
| 3184 success ^= (UBool)(opValue != 0); // flip sense for \V |
| 3185 if (success) { |
| 3186 fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); |
| 3187 } else { |
| 3188 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 3189 } |
| 3190 } |
| 3191 break; |
| 3192 |
| 3193 |
3099 case URX_BACKSLASH_X: | 3194 case URX_BACKSLASH_X: |
3100 // Match a Grapheme, as defined by Unicode TR 29. | 3195 // Match a Grapheme, as defined by Unicode TR 29. |
3101 // Differs slightly from Perl, which consumes combining marks indep
endently | 3196 // Differs slightly from Perl, which consumes combining marks indep
endently |
3102 // of context. | 3197 // of context. |
3103 { | 3198 { |
3104 | 3199 |
3105 // Fail if at end of input | 3200 // Fail if at end of input |
3106 if (fp->fInputIdx >= fActiveLimit) { | 3201 if (fp->fInputIdx >= fActiveLimit) { |
3107 fHitEnd = TRUE; | 3202 fHitEnd = TRUE; |
3108 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 3203 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3316 // At end of input. Match failed. Backtrack out. | 3411 // At end of input. Match failed. Backtrack out. |
3317 fHitEnd = TRUE; | 3412 fHitEnd = TRUE; |
3318 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 3413 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
3319 break; | 3414 break; |
3320 } | 3415 } |
3321 | 3416 |
3322 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); | 3417 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); |
3323 | 3418 |
3324 // There is input left. Advance over one char, unless we've hit
end-of-line | 3419 // There is input left. Advance over one char, unless we've hit
end-of-line |
3325 UChar32 c = UTEXT_NEXT32(fInputText); | 3420 UChar32 c = UTEXT_NEXT32(fInputText); |
3326 if (((c & 0x7f) <= 0x29) && // First quickly bypass as many
chars as possible | 3421 if (isLineTerminator(c)) { |
3327 ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029))
{ | |
3328 // End of line in normal mode. . does not match. | 3422 // End of line in normal mode. . does not match. |
3329 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 3423 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
3330 break; | 3424 break; |
3331 } | 3425 } |
3332 fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); | 3426 fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); |
3333 } | 3427 } |
3334 break; | 3428 break; |
3335 | 3429 |
3336 | 3430 |
3337 case URX_DOTANY_ALL: | 3431 case URX_DOTANY_ALL: |
(...skipping 736 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4074 UTEXT_SETNATIVEINDEX(fInputText, ix); | 4168 UTEXT_SETNATIVEINDEX(fInputText, ix); |
4075 for (;;) { | 4169 for (;;) { |
4076 if (ix >= fActiveLimit) { | 4170 if (ix >= fActiveLimit) { |
4077 fHitEnd = TRUE; | 4171 fHitEnd = TRUE; |
4078 break; | 4172 break; |
4079 } | 4173 } |
4080 UChar32 c = UTEXT_NEXT32(fInputText); | 4174 UChar32 c = UTEXT_NEXT32(fInputText); |
4081 if ((c & 0x7f) <= 0x29) { // Fast filter of non
-new-line-s | 4175 if ((c & 0x7f) <= 0x29) { // Fast filter of non
-new-line-s |
4082 if ((c == 0x0a) || // 0x0a is newline i
n both modes. | 4176 if ((c == 0x0a) || // 0x0a is newline i
n both modes. |
4083 (((opValue & 2) == 0) && // IF not UNIX_LINES
mode | 4177 (((opValue & 2) == 0) && // IF not UNIX_LINES
mode |
4084 (c<=0x0d && c>=0x0a)) || c==0x85 ||c==0x2028
|| c==0x2029) { | 4178 isLineTerminator(c))) { |
4085 // char is a line ending. Exit the scanning lo
op. | 4179 // char is a line ending. Exit the scanning lo
op. |
4086 break; | 4180 break; |
4087 } | 4181 } |
4088 } | 4182 } |
4089 ix = UTEXT_GETNATIVEINDEX(fInputText); | 4183 ix = UTEXT_GETNATIVEINDEX(fInputText); |
4090 } | 4184 } |
4091 } | 4185 } |
4092 | 4186 |
4093 // If there were no matching characters, skip over the loop alto
gether. | 4187 // If there were no matching characters, skip over the loop alto
gether. |
4094 // The loop doesn't run at all, a * op always succeeds. | 4188 // The loop doesn't run at all, a * op always succeeds. |
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4250 // | 4344 // |
4251 int64_t *pat = fPattern->fCompiledPat->getBuffer(); | 4345 int64_t *pat = fPattern->fCompiledPat->getBuffer(); |
4252 | 4346 |
4253 const UChar *litText = fPattern->fLiteralText.getBuffer(); | 4347 const UChar *litText = fPattern->fLiteralText.getBuffer(); |
4254 UVector *sets = fPattern->fSets; | 4348 UVector *sets = fPattern->fSets; |
4255 | 4349 |
4256 const UChar *inputBuf = fInputText->chunkContents; | 4350 const UChar *inputBuf = fInputText->chunkContents; |
4257 | 4351 |
4258 fFrameSize = fPattern->fFrameSize; | 4352 fFrameSize = fPattern->fFrameSize; |
4259 REStackFrame *fp = resetStack(); | 4353 REStackFrame *fp = resetStack(); |
| 4354 if (U_FAILURE(fDeferredStatus)) { |
| 4355 status = fDeferredStatus; |
| 4356 return; |
| 4357 } |
4260 | 4358 |
4261 fp->fPatIdx = 0; | 4359 fp->fPatIdx = 0; |
4262 fp->fInputIdx = startIdx; | 4360 fp->fInputIdx = startIdx; |
4263 | 4361 |
4264 // Zero out the pattern's static data | 4362 // Zero out the pattern's static data |
4265 int32_t i; | 4363 int32_t i; |
4266 for (i = 0; i<fPattern->fDataSize; i++) { | 4364 for (i = 0; i<fPattern->fDataSize; i++) { |
4267 fData[i] = 0; | 4365 fData[i] = 0; |
4268 } | 4366 } |
4269 | 4367 |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4405 fRequireEnd = TRUE; | 4503 fRequireEnd = TRUE; |
4406 break; | 4504 break; |
4407 } | 4505 } |
4408 | 4506 |
4409 // If we are positioned just before a new-line that is located at th
e | 4507 // If we are positioned just before a new-line that is located at th
e |
4410 // end of input, succeed. | 4508 // end of input, succeed. |
4411 if (fp->fInputIdx == fAnchorLimit-1) { | 4509 if (fp->fInputIdx == fAnchorLimit-1) { |
4412 UChar32 c; | 4510 UChar32 c; |
4413 U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c); | 4511 U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c); |
4414 | 4512 |
4415 if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) { | 4513 if (isLineTerminator(c)) { |
4416 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp-
>fInputIdx-1]==0x0d)) { | 4514 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp-
>fInputIdx-1]==0x0d)) { |
4417 // At new-line at end of input. Success | 4515 // At new-line at end of input. Success |
4418 fHitEnd = TRUE; | 4516 fHitEnd = TRUE; |
4419 fRequireEnd = TRUE; | 4517 fRequireEnd = TRUE; |
4420 break; | 4518 break; |
4421 } | 4519 } |
4422 } | 4520 } |
4423 } else if (fp->fInputIdx == fAnchorLimit-2 && | 4521 } else if (fp->fInputIdx == fAnchorLimit-2 && |
4424 inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a
) { | 4522 inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a
) { |
4425 fHitEnd = TRUE; | 4523 fHitEnd = TRUE; |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4459 { | 4557 { |
4460 if (fp->fInputIdx >= fAnchorLimit) { | 4558 if (fp->fInputIdx >= fAnchorLimit) { |
4461 // We really are at the end of input. Success. | 4559 // We really are at the end of input. Success. |
4462 fHitEnd = TRUE; | 4560 fHitEnd = TRUE; |
4463 fRequireEnd = TRUE; | 4561 fRequireEnd = TRUE; |
4464 break; | 4562 break; |
4465 } | 4563 } |
4466 // If we are positioned just before a new-line, succeed. | 4564 // If we are positioned just before a new-line, succeed. |
4467 // It makes no difference where the new-line is within the input
. | 4565 // It makes no difference where the new-line is within the input
. |
4468 UChar32 c = inputBuf[fp->fInputIdx]; | 4566 UChar32 c = inputBuf[fp->fInputIdx]; |
4469 if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) { | 4567 if (isLineTerminator(c)) { |
4470 // At a line end, except for the odd chance of being in the
middle of a CR/LF sequence | 4568 // At a line end, except for the odd chance of being in the
middle of a CR/LF sequence |
4471 // In multi-line mode, hitting a new-line just before the e
nd of input does not | 4569 // In multi-line mode, hitting a new-line just before the e
nd of input does not |
4472 // set the hitEnd or requireEnd flags | 4570 // set the hitEnd or requireEnd flags |
4473 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp-
>fInputIdx-1]==0x0d)) { | 4571 if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp-
>fInputIdx-1]==0x0d)) { |
4474 break; | 4572 break; |
4475 } | 4573 } |
4476 } | 4574 } |
4477 // not at a new line. Fail. | 4575 // not at a new line. Fail. |
4478 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 4576 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
4479 } | 4577 } |
(...skipping 27 matching lines...) Expand all Loading... |
4507 case URX_CARET_M: // ^, test for start of line in mul
it-line mode | 4605 case URX_CARET_M: // ^, test for start of line in mul
it-line mode |
4508 { | 4606 { |
4509 if (fp->fInputIdx == fAnchorStart) { | 4607 if (fp->fInputIdx == fAnchorStart) { |
4510 // We are at the start input. Success. | 4608 // We are at the start input. Success. |
4511 break; | 4609 break; |
4512 } | 4610 } |
4513 // Check whether character just before the current pos is a new-
line | 4611 // Check whether character just before the current pos is a new-
line |
4514 // unless we are at the end of input | 4612 // unless we are at the end of input |
4515 UChar c = inputBuf[fp->fInputIdx - 1]; | 4613 UChar c = inputBuf[fp->fInputIdx - 1]; |
4516 if ((fp->fInputIdx < fAnchorLimit) && | 4614 if ((fp->fInputIdx < fAnchorLimit) && |
4517 ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029))
{ | 4615 isLineTerminator(c)) { |
4518 // It's a new-line. ^ is true. Success. | 4616 // It's a new-line. ^ is true. Success. |
4519 // TODO: what should be done with positions between a CR a
nd LF? | 4617 // TODO: what should be done with positions between a CR a
nd LF? |
4520 break; | 4618 break; |
4521 } | 4619 } |
4522 // Not at the start of a line. Fail. | 4620 // Not at the start of a line. Fail. |
4523 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 4621 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
4524 } | 4622 } |
4525 break; | 4623 break; |
4526 | 4624 |
4527 | 4625 |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4584 break; | 4682 break; |
4585 | 4683 |
4586 | 4684 |
4587 case URX_BACKSLASH_G: // Test for position at end of previous m
atch | 4685 case URX_BACKSLASH_G: // Test for position at end of previous m
atch |
4588 if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->
fInputIdx==fActiveStart))) { | 4686 if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->
fInputIdx==fActiveStart))) { |
4589 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 4687 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
4590 } | 4688 } |
4591 break; | 4689 break; |
4592 | 4690 |
4593 | 4691 |
| 4692 case URX_BACKSLASH_H: // Test for \h, horizontal white space. |
| 4693 { |
| 4694 if (fp->fInputIdx >= fActiveLimit) { |
| 4695 fHitEnd = TRUE; |
| 4696 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 4697 break; |
| 4698 } |
| 4699 UChar32 c; |
| 4700 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); |
| 4701 int8_t ctype = u_charType(c); |
| 4702 UBool success = (ctype == U_SPACE_SEPARATOR || c == 9); // SPAC
E_SEPARATOR || TAB |
| 4703 success ^= (UBool)(opValue != 0); // flip sense for \H |
| 4704 if (!success) { |
| 4705 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 4706 } |
| 4707 } |
| 4708 break; |
| 4709 |
| 4710 |
| 4711 case URX_BACKSLASH_R: // Test for \R, any line break sequence
. |
| 4712 { |
| 4713 if (fp->fInputIdx >= fActiveLimit) { |
| 4714 fHitEnd = TRUE; |
| 4715 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 4716 break; |
| 4717 } |
| 4718 UChar32 c; |
| 4719 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); |
| 4720 if (isLineTerminator(c)) { |
| 4721 if (c == 0x0d && fp->fInputIdx < fActiveLimit) { |
| 4722 // Check for CR/LF sequence. Consume both together when
found. |
| 4723 UChar c2; |
| 4724 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c2); |
| 4725 if (c2 != 0x0a) { |
| 4726 U16_PREV(inputBuf, 0, fp->fInputIdx, c2); |
| 4727 } |
| 4728 } |
| 4729 } else { |
| 4730 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 4731 } |
| 4732 } |
| 4733 break; |
| 4734 |
| 4735 |
| 4736 case URX_BACKSLASH_V: // Any single code point line ending. |
| 4737 { |
| 4738 if (fp->fInputIdx >= fActiveLimit) { |
| 4739 fHitEnd = TRUE; |
| 4740 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 4741 break; |
| 4742 } |
| 4743 UChar32 c; |
| 4744 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); |
| 4745 UBool success = isLineTerminator(c); |
| 4746 success ^= (UBool)(opValue != 0); // flip sense for \V |
| 4747 if (!success) { |
| 4748 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
| 4749 } |
| 4750 } |
| 4751 break; |
| 4752 |
| 4753 |
| 4754 |
4594 case URX_BACKSLASH_X: | 4755 case URX_BACKSLASH_X: |
4595 // Match a Grapheme, as defined by Unicode TR 29. | 4756 // Match a Grapheme, as defined by Unicode TR 29. |
4596 // Differs slightly from Perl, which consumes combining marks independe
ntly | 4757 // Differs slightly from Perl, which consumes combining marks independe
ntly |
4597 // of context. | 4758 // of context. |
4598 { | 4759 { |
4599 | 4760 |
4600 // Fail if at end of input | 4761 // Fail if at end of input |
4601 if (fp->fInputIdx >= fActiveLimit) { | 4762 if (fp->fInputIdx >= fActiveLimit) { |
4602 fHitEnd = TRUE; | 4763 fHitEnd = TRUE; |
4603 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 4764 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4793 if (fp->fInputIdx >= fActiveLimit) { | 4954 if (fp->fInputIdx >= fActiveLimit) { |
4794 // At end of input. Match failed. Backtrack out. | 4955 // At end of input. Match failed. Backtrack out. |
4795 fHitEnd = TRUE; | 4956 fHitEnd = TRUE; |
4796 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 4957 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
4797 break; | 4958 break; |
4798 } | 4959 } |
4799 | 4960 |
4800 // There is input left. Advance over one char, unless we've hit
end-of-line | 4961 // There is input left. Advance over one char, unless we've hit
end-of-line |
4801 UChar32 c; | 4962 UChar32 c; |
4802 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); | 4963 U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); |
4803 if (((c & 0x7f) <= 0x29) && // First quickly bypass as many
chars as possible | 4964 if (isLineTerminator(c)) { |
4804 ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029))
{ | |
4805 // End of line in normal mode. . does not match. | 4965 // End of line in normal mode. . does not match. |
4806 fp = (REStackFrame *)fStack->popFrame(fFrameSize); | 4966 fp = (REStackFrame *)fStack->popFrame(fFrameSize); |
4807 break; | 4967 break; |
4808 } | 4968 } |
4809 } | 4969 } |
4810 break; | 4970 break; |
4811 | 4971 |
4812 | 4972 |
4813 case URX_DOTANY_ALL: | 4973 case URX_DOTANY_ALL: |
4814 { | 4974 { |
(...skipping 693 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5508 for (;;) { | 5668 for (;;) { |
5509 if (ix >= fActiveLimit) { | 5669 if (ix >= fActiveLimit) { |
5510 fHitEnd = TRUE; | 5670 fHitEnd = TRUE; |
5511 break; | 5671 break; |
5512 } | 5672 } |
5513 UChar32 c; | 5673 UChar32 c; |
5514 U16_NEXT(inputBuf, ix, fActiveLimit, c); // c = inputB
uf[ix++] | 5674 U16_NEXT(inputBuf, ix, fActiveLimit, c); // c = inputB
uf[ix++] |
5515 if ((c & 0x7f) <= 0x29) { // Fast filter of non
-new-line-s | 5675 if ((c & 0x7f) <= 0x29) { // Fast filter of non
-new-line-s |
5516 if ((c == 0x0a) || // 0x0a is newline i
n both modes. | 5676 if ((c == 0x0a) || // 0x0a is newline i
n both modes. |
5517 (((opValue & 2) == 0) && // IF not UNIX_LINES
mode | 5677 (((opValue & 2) == 0) && // IF not UNIX_LINES
mode |
5518 ((c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028
|| c==0x2029))) { | 5678 isLineTerminator(c))) { |
5519 // char is a line ending. Put the input pos ba
ck to the | 5679 // char is a line ending. Put the input pos ba
ck to the |
5520 // line ending char, and exit the scanning lo
op. | 5680 // line ending char, and exit the scanning lo
op. |
5521 U16_BACK_1(inputBuf, 0, ix); | 5681 U16_BACK_1(inputBuf, 0, ix); |
5522 break; | 5682 break; |
5523 } | 5683 } |
5524 } | 5684 } |
5525 } | 5685 } |
5526 } | 5686 } |
5527 | 5687 |
5528 // If there were no matching characters, skip over the loop alto
gether. | 5688 // If there were no matching characters, skip over the loop alto
gether. |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5624 | 5784 |
5625 return; | 5785 return; |
5626 } | 5786 } |
5627 | 5787 |
5628 | 5788 |
5629 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) | 5789 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) |
5630 | 5790 |
5631 U_NAMESPACE_END | 5791 U_NAMESPACE_END |
5632 | 5792 |
5633 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 5793 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
OLD | NEW |