OLD | NEW |
1 | 1 |
2 #***************************************************************************** | 2 #***************************************************************************** |
3 # | 3 # |
4 # Copyright (C) 2002-2007, International Business Machines Corporation and oth
ers. | 4 # Copyright (C) 2002-2015, International Business Machines Corporation and oth
ers. |
5 # All Rights Reserved. | 5 # All Rights Reserved. |
6 # | 6 # |
7 #***************************************************************************** | 7 #***************************************************************************** |
8 # | 8 # |
9 # file: regexcst.txt | 9 # file: regexcst.txt |
10 # ICU Regular Expression Parser State Table | 10 # ICU Regular Expression Parser State Table |
11 # | 11 # |
12 # This state table is used when reading and parsing a regular expression pat
tern | 12 # This state table is used when reading and parsing a regular expression pat
tern |
13 # The pattern parser uses a state machine; the data in this file define the | 13 # The pattern parser uses a state machine; the data in this file define the |
14 # state transitions that occur for each input character. | 14 # state transitions that occur for each input character. |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
140 'w' paren-flag doBeginMatch
Mode | 140 'w' paren-flag doBeginMatch
Mode |
141 'x' paren-flag doBeginMatch
Mode | 141 'x' paren-flag doBeginMatch
Mode |
142 '-' paren-flag doBeginMatch
Mode | 142 '-' paren-flag doBeginMatch
Mode |
143 '(' n errorDeath doConditiona
lExpr | 143 '(' n errorDeath doConditiona
lExpr |
144 '{' n errorDeath doPerlInline | 144 '{' n errorDeath doPerlInline |
145 default errorDeath doBadOpenPar
enType | 145 default errorDeath doBadOpenPar
enType |
146 | 146 |
147 open-paren-lookbehind: | 147 open-paren-lookbehind: |
148 '=' n term ^expr-cont doOpenLookBe
hind # (?<= | 148 '=' n term ^expr-cont doOpenLookBe
hind # (?<= |
149 '!' n term ^expr-cont doOpenLookBe
hindNeg # (?<! | 149 '!' n term ^expr-cont doOpenLookBe
hindNeg # (?<! |
| 150 ascii_letter named-capture doBeginNamed
Capture # (?<name |
150 default errorDeath doBadOpenPar
enType | 151 default errorDeath doBadOpenPar
enType |
151 | 152 |
152 | 153 |
153 # | 154 # |
154 # paren-comment We've got a (?# ... ) style comment. Eat pattern text til
l we get to the ')' | 155 # paren-comment We've got a (?# ... ) style comment. Eat pattern text til
l we get to the ')' |
155 # | 156 # |
156 paren-comment: | 157 paren-comment: |
157 ')' n pop | 158 ')' n pop |
158 eof errorDeath doMismat
chedParenErr | 159 eof errorDeath doMismat
chedParenErr |
159 default n paren-comment | 160 default n paren-comment |
160 | 161 |
161 # | 162 # |
162 # paren-flag Scanned a (?ismx-ismx flag setting | 163 # paren-flag Scanned a (?ismx-ismx flag setting |
163 # | 164 # |
164 paren-flag: | 165 paren-flag: |
165 'i' n paren-flag doMatchMode | 166 'i' n paren-flag doMatchMode |
166 'd' n paren-flag doMatchMode | 167 'd' n paren-flag doMatchMode |
167 'm' n paren-flag doMatchMode | 168 'm' n paren-flag doMatchMode |
168 's' n paren-flag doMatchMode | 169 's' n paren-flag doMatchMode |
169 'u' n paren-flag doMatchMode | 170 'u' n paren-flag doMatchMode |
170 'w' n paren-flag doMatchMode | 171 'w' n paren-flag doMatchMode |
171 'x' n paren-flag doMatchMode | 172 'x' n paren-flag doMatchMode |
172 '-' n paren-flag doMatchMode | 173 '-' n paren-flag doMatchMode |
173 ')' n term doSetMatchMo
de | 174 ')' n term doSetMatchMo
de |
174 ':' n term ^expr-quant doMatchModeP
aren | 175 ':' n term ^expr-quant doMatchModeP
aren |
175 default errorDeath doBadModeFla
g | 176 default errorDeath doBadModeFla
g |
176 | 177 |
| 178 # |
| 179 # named-capture (?<name> ... ), position currently on the name. |
| 180 # |
| 181 named-capture: |
| 182 ascii_letter n named-capture doContinueNa
medCapture |
| 183 digit_char n named-capture doContinueNa
medCapture |
| 184 '>' n term ^expr-quant doOpenCaptur
eParen # common w non-named capture. |
| 185 default errorDeath doBadNamedCa
pture |
177 | 186 |
178 # | 187 # |
179 # quant-star Scanning a '*' quantifier. Need to look ahead to decide | 188 # quant-star Scanning a '*' quantifier. Need to look ahead to decide |
180 # between plain '*', '*?', '*+' | 189 # between plain '*', '*?', '*+' |
181 # | 190 # |
182 quant-star: | 191 quant-star: |
183 '?' n expr-cont doNGStar
# *? | 192 '?' n expr-cont doNGStar
# *? |
184 '+' n expr-cont doPossessive
Star # *+ | 193 '+' n expr-cont doPossessive
Star # *+ |
185 default expr-cont doStar | 194 default expr-cont doStar |
186 | 195 |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
234 # backslash # Backslash. Figure out which of the \thingies we have enc
ountered. | 243 # backslash # Backslash. Figure out which of the \thingies we have enc
ountered. |
235 # The low level next-char function will have pr
eprocessed | 244 # The low level next-char function will have pr
eprocessed |
236 # some of them already; those won't come here. | 245 # some of them already; those won't come here. |
237 backslash: | 246 backslash: |
238 'A' n term doBackslashA | 247 'A' n term doBackslashA |
239 'B' n term doBackslashB | 248 'B' n term doBackslashB |
240 'b' n term doBackslashb | 249 'b' n term doBackslashb |
241 'd' n expr-quant doBackslashd | 250 'd' n expr-quant doBackslashd |
242 'D' n expr-quant doBackslashD | 251 'D' n expr-quant doBackslashD |
243 'G' n term doBackslashG | 252 'G' n term doBackslashG |
| 253 'h' n expr-quant doBackslashh |
| 254 'H' n expr-quant doBackslashH |
| 255 'k' n named-backref |
244 'N' expr-quant doNamedChar
# \N{NAME} named char | 256 'N' expr-quant doNamedChar
# \N{NAME} named char |
245 'p' expr-quant doProperty
# \p{Lu} style property | 257 'p' expr-quant doProperty
# \p{Lu} style property |
246 'P' expr-quant doProperty | 258 'P' expr-quant doProperty |
| 259 'R' n expr-quant doBackslashR |
247 'Q' n term doEnterQuote
Mode | 260 'Q' n term doEnterQuote
Mode |
248 'S' n expr-quant doBackslashS | 261 'S' n expr-quant doBackslashS |
249 's' n expr-quant doBackslashs | 262 's' n expr-quant doBackslashs |
| 263 'v' n expr-quant doBackslashv |
| 264 'V' n expr-quant doBackslashV |
250 'W' n expr-quant doBackslashW | 265 'W' n expr-quant doBackslashW |
251 'w' n expr-quant doBackslashw | 266 'w' n expr-quant doBackslashw |
252 'X' n expr-quant doBackslashX | 267 'X' n expr-quant doBackslashX |
253 'Z' n term doBackslashZ | 268 'Z' n term doBackslashZ |
254 'z' n term doBackslashz | 269 'z' n term doBackslashz |
255 digit_char n expr-quant doBackRef
# Will scan multiple digits | 270 digit_char n expr-quant doBackRef
# Will scan multiple digits |
256 eof errorDeath doEscapeErro
r | 271 eof errorDeath doEscapeErro
r |
257 default n expr-quant doEscapedLit
eralChar | 272 default n expr-quant doEscapedLit
eralChar |
258 | 273 |
259 | 274 |
| 275 # named-backref Scanned \k |
| 276 # Leading to \k<captureName> |
| 277 # Failure to get the full sequence is an error. |
| 278 # |
| 279 named-backref: |
| 280 '<' n named-backref-2 doBeginNamed
BackRef |
| 281 default errorDeath doBadNamedCa
pture |
| 282 |
| 283 named-backref-2: |
| 284 ascii_letter n named-backref-3 doContinueNa
medBackRef |
| 285 default errorDeath doBadNamedCa
pture |
| 286 |
| 287 named-backref-3: |
| 288 ascii_letter n named-backref-3 doContinueNa
medBackRef |
| 289 digit_char n named-backref-3 doContinueNa
medBackRef |
| 290 '>' n expr-quant doCompleteNa
medBackRef |
| 291 default errorDeath doBadNamedCa
pture |
| 292 |
260 | 293 |
261 # | 294 # |
262 # [set expression] parsing, | 295 # [set expression] parsing, |
263 # All states involved in parsing set expressions have names beginning with "s
et-" | 296 # All states involved in parsing set expressions have names beginning with "s
et-" |
264 # | 297 # |
265 | 298 |
266 set-open: | 299 set-open: |
267 '^' n set-open2 doSetNegate | 300 '^' n set-open2 doSetNegate |
268 ':' set-posix doSetPosixPr
op | 301 ':' set-posix doSetPosixPr
op |
269 default set-open2 | 302 default set-open2 |
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
437 set-escape: | 470 set-escape: |
438 'p' set-after-set doSetProp | 471 'p' set-after-set doSetProp |
439 'P' set-after-set doSetProp | 472 'P' set-after-set doSetProp |
440 'N' set-after-lit doSetNamedCh
ar | 473 'N' set-after-lit doSetNamedCh
ar |
441 's' n set-after-range doSetBacksla
sh_s | 474 's' n set-after-range doSetBacksla
sh_s |
442 'S' n set-after-range doSetBacksla
sh_S | 475 'S' n set-after-range doSetBacksla
sh_S |
443 'w' n set-after-range doSetBacksla
sh_w | 476 'w' n set-after-range doSetBacksla
sh_w |
444 'W' n set-after-range doSetBacksla
sh_W | 477 'W' n set-after-range doSetBacksla
sh_W |
445 'd' n set-after-range doSetBacksla
sh_d | 478 'd' n set-after-range doSetBacksla
sh_d |
446 'D' n set-after-range doSetBacksla
sh_D | 479 'D' n set-after-range doSetBacksla
sh_D |
| 480 'h' n set-after-range doSetBacksla
sh_h |
| 481 'H' n set-after-range doSetBacksla
sh_H |
| 482 'v' n set-after-range doSetBacksla
sh_v |
| 483 'V' n set-after-range doSetBacksla
sh_V |
447 default n set-after-lit doSetLiteral
Escaped | 484 default n set-after-lit doSetLiteral
Escaped |
448 | 485 |
449 # | 486 # |
450 # set-finish | 487 # set-finish |
451 # Have just encountered the final ']' that completes a [set], and | 488 # Have just encountered the final ']' that completes a [set], and |
452 # arrived here via a pop. From here, we exit the set parsing world, and go | 489 # arrived here via a pop. From here, we exit the set parsing world, and go |
453 # back to generic regular expression parsing. | 490 # back to generic regular expression parsing. |
454 # | 491 # |
455 set-finish: | 492 set-finish: |
456 default expr-quant doSetFinish | 493 default expr-quant doSetFinish |
457 | 494 |
458 | 495 |
459 # | 496 # |
460 # errorDeath. This state is specified as the next state whenever a syntax erro
r | 497 # errorDeath. This state is specified as the next state whenever a syntax erro
r |
461 # in the source rules is detected. Barring bugs, the state machin
e will never | 498 # in the source rules is detected. Barring bugs, the state machin
e will never |
462 # actually get here, but will stop because of the action associate
d with the error. | 499 # actually get here, but will stop because of the action associate
d with the error. |
463 # But, just in case, this state asks the state machine to exit. | 500 # But, just in case, this state asks the state machine to exit. |
464 errorDeath: | 501 errorDeath: |
465 default n errorDeath doExit | 502 default n errorDeath doExit |
466 | 503 |
467 | 504 |
OLD | NEW |