icu46/source/common/rbbirpt.txt - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/rbbirpt.txt

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1

	2 #*****************************************************************************

	3 #

	4 # Copyright (C) 2002-2003, International Business Machines Corporation and oth ers.

	5 # All Rights Reserved.

	6 #

	7 #*****************************************************************************

	8 #

	9 # file: rbbirpt.txt

	10 # ICU Break Iterator Rule Parser State Table

	11 #

	12 # This state table is used when reading and parsing a set of RBBI rules

	13 # The rule parser uses a state machine; the data in this file define the

	14 # state transitions that occur for each input character.

	15 #

	16 # *** This file defines the RBBI rule grammar. This is it.

	17 # *** The determination of what is accepted is here.

	18 #

	19 # This file is processed by a perl script "rbbicst.pl" to produce initialize d C arrays

	20 # that are then built with the rule parser.

	21 #

	22

	23 #

	24 # Here is the syntax of the state definitions in this file:

	25 #

	26 #

	27 #StateName:

	28 # input-char n next-state ^push-state action

	29 # input-char n next-state ^push-state action

	30 # \| \| \| \| \|

	31 # \| \| \| \| \|--- action to be performed by state machine

	32 # \| \| \| \| See funct ion RBBIRuleScanner::doParseActions()

	33 # \| \| \| \|

	34 # \| \| \| \|--- Push this named state o nto the state stack.

	35 # \| \| \| Later, when next state is specified as "pop",

	36 # \| \| \| the pushed state will b ecome the current state.

	37 # \| \| \|

	38 # \| \| \|--- Transition to this state if the current input character matches the input

	39 # \| \| character or char class in the left hand colum n. "pop" causes the next

	40 # \| \| state to be popped from the state stack.

	41 # \| \|

	42 # \| \|--- When making the state transition specified on this line, advance to the next

	43 # \| character from the input only if 'n' appears here.

	44 # \|

	45 # \|--- Character or named character classes to test for. If the current c haracter being scanned

	46 # matches, peform the actions and go to the state specified on this l ine.

	47 # The input character is tested sequentally, in the order written. T he characters and

	48 # character classes tested for do not need to be mutually exclusive. The first match wins.

	49 #

	50

	51

	52

	53

	54 #

	55 # start state, scan position is at the beginning of the rules file, or in betwe en two rules.

	56 #

	57 start:

	58 escaped term ^break-rule-end doExprStart

	59 white_space n start

	60 '$' scan-var-name ^assign-or-rule doExprStart

	61 '!' n rev-option

	62 ';' n start # ignore empty rules.

	63 eof exit

	64 default term ^break-rule-end doExprStart

	65

	66 #

	67 # break-rule-end: Returned from doing a break-rule expression.

	68 #

	69 break-rule-end:

	70 ';' n start doEndOfRule

	71 white_space n break-rule-end

	72 default errorDeath doRuleError

	73

	74

	75 #

	76 # ! We've just scanned a '!', indicating either a !!key word fla g or a

	77 # !Reverse rule.

	78 #

	79 rev-option:

	80 '!' n option-scan1

	81 default reverse-rule ^break-rule-end doReverseDir

	82

	83 option-scan1:

	84 name_start_char n option-scan2 doOptionStar t

	85 default errorDeath doRuleError

	86

	87 option-scan2:

	88 name_char n option-scan2

	89 default option-scan3 doOptionEnd

	90

	91 option-scan3:

	92 ';' n start

	93 white_space n option-scan3

	94 default errorDeath doRuleError

	95

	96

	97 reverse-rule:

	98 default term ^break-rule-end doExprStart

	99

	100

	101 #

	102 # term. Eat through a single rule character, or a composite thing, which

	103 # could be a parenthesized expression, a variable name, or a Unicode Set .

	104 #

	105 term:

	106 escaped n expr-mod doRuleChar

	107 white_space n term

	108 rule_char n expr-mod doRuleChar

	109 '[' scan-unicode-set ^expr-mod

	110 '(' n term ^expr-mod doLParen

	111 '$' scan-var-name ^term-var-ref

	112 '.' n expr-mod doDotAny

	113 default errorDeath doRuleError

	114

	115

	116

	117 #

	118 # term-var-ref We've just finished scanning a reference to a $variable.

	119 # Check that the variable was defined.

	120 # The variable name scanning is in common with assignment statem ents,

	121 # so the check can't be done there.

	122 term-var-ref:

	123 default expr-mod doCheckVarDe f

	124

	125

	126 #

	127 # expr-mod We've just finished scanning a term, now look for the optional

	128 # trailing '*', '?', '+'

	129 #

	130 expr-mod:

	131 white_space n expr-mod

	132 '*' n expr-cont doUnaryOpSta r

	133 '+' n expr-cont doUnaryOpPlu s

	134 '?' n expr-cont doUnaryOpQue stion

	135 default expr-cont

	136

	137

	138 #

	139 # expr-cont Expression, continuation. At a point where additional terms a re

	140 # allowed, but not required.

	141 #

	142 expr-cont:

	143 escaped term doExprCatOpe rator

	144 white_space n expr-cont

	145 rule_char term doExprCatOpe rator

	146 '[' term doExprCatOpe rator

	147 '(' term doExprCatOpe rator

	148 '$' term doExprCatOpe rator

	149 '.' term doExprCatOpe rator

	150 '/' look-ahead doExprCatOpe rator

	151 '{' n tag-open doExprCatOpe rator

	152 '\|' n term doExprOrOper ator

	153 ')' n pop doExprRParen

	154 default pop doExprFinish ed

	155

	156

	157 #

	158 # look-ahead Scanning a '/', which identifies a break point, assuming that the

	159 # remainder of the expression matches.

	160 #

	161 # Generate a parse tree as if this was a special kind of input s ymbol

	162 # appearing in an otherwise normal concatenation expression.

	163 #

	164 look-ahead:

	165 '/' n expr-cont-no-slash doSlash

	166 default errorDeath

	167

	168

	169 #

	170 # expr-cont-no-slash Expression, continuation. At a point where additional terms are

	171 # allowed, but not required. Just li ke

	172 # expr-cont, above, except that no '/ '

	173 # look-ahead symbol is permitted.

	174 #

	175 expr-cont-no-slash:

	176 escaped term doExprCatOpe rator

	177 white_space n expr-cont

	178 rule_char term doExprCatOpe rator

	179 '[' term doExprCatOpe rator

	180 '(' term doExprCatOpe rator

	181 '$' term doExprCatOpe rator

	182 '.' term doExprCatOpe rator

	183 '\|' n term doExprOrOper ator

	184 ')' n pop doExprRParen

	185 default pop doExprFinish ed

	186

	187

	188 #

	189 # tags scanning a '{', the opening delimiter for a tag that identi fies

	190 # the kind of match. Scan the whole {dddd} tag, where d=digi t

	191 #

	192 tag-open:

	193 white_space n tag-open

	194 digit_char tag-value doStartTagVa lue

	195 default errorDeath doTagExpecte dError

	196

	197 tag-value:

	198 white_space n tag-close

	199 '}' tag-close

	200 digit_char n tag-value doTagDigit

	201 default errorDeath doTagExpecte dError

	202

	203 tag-close:

	204 white_space n tag-close

	205 '}' n expr-cont-no-tag doTagValue

	206 default errorDeath doTagExpecte dError

	207

	208

	209

	210 #

	211 # expr-cont-no-tag Expression, continuation. At a point where additional te rms are

	212 # allowed, but not required. Just li ke

	213 # expr-cont, above, except that no "{ ddd}"

	214 # tagging is permitted.

	215 #

	216 expr-cont-no-tag:

	217 escaped term doExprCatOpe rator

	218 white_space n expr-cont-no-tag

	219 rule_char term doExprCatOpe rator

	220 '[' term doExprCatOpe rator

	221 '(' term doExprCatOpe rator

	222 '$' term doExprCatOpe rator

	223 '.' term doExprCatOpe rator

	224 '/' look-ahead doExprCatOpe rator

	225 '\|' n term doExprOrOper ator

	226 ')' n pop doExprRParen

	227 default pop doExprFinish ed

	228

	229

	230

	231

	232 #

	233 # Variable Name Scanning.

	234 #

	235 # The state that branched to here must have pushed a return s tate

	236 # to go to after completion of the variable name scanning.

	237 #

	238 # The current input character must be the $ that introduces t he name.

	239 # The $ is consummed here rather than in the state that first detected it

	240 # so that the doStartVariableName action only needs to happen in one

	241 # place (here), and the other states don't need to worry abou t it.

	242 #

	243 scan-var-name:

	244 '$' n scan-var-start doStartVaria bleName

	245 default errorDeath

	246

	247

	248 scan-var-start:

	249 name_start_char n scan-var-body

	250 default errorDeath doVariableNa meExpectedErr

	251

	252 scan-var-body:

	253 name_char n scan-var-body

	254 default pop doEndVariabl eName

	255

	256

	257

	258 #

	259 # scan-unicode-set Unicode Sets are parsed by the the UnicodeSet class.

	260 # Within the RBBI parser, after finding the first character

	261 # of a Unicode Set, we just hand the rule input at that

	262 # point of to the Unicode Set constructor, then pick

	263 # up parsing after the close of the set.

	264 #

	265 # The action for this state invokes the UnicodeSet parser.

	266 #

	267 scan-unicode-set:

	268 '[' n pop doScanUnico deSet

	269 'p' n pop doScanUnico deSet

	270 'P' n pop doScanUnico deSet

	271 default errorDeath

	272

	273

	274

	275

	276

	277

	278

	279 #

	280 # assign-or-rule. A $variable was encountered at the start of something, coul d be

	281 # either an assignment statement or a rule, depending on whet her an '='

	282 # follows the variable name. We get to this state when the v ariable name

	283 # scanning does a return.

	284 #

	285 assign-or-rule:

	286 white_space n assign-or-rule

	287 '=' n term ^assign-end doStartAssig n # variable was target of assignment

	288 default term-var-ref ^break-rule-end # variable was a term in a rule

	289

	290

	291

	292 #

	293 # assign-end This state is entered when the end of the expression on the

	294 # right hand side of an assignment is found. We get here via

	295 # a pop; this state is pushed when the '=' in an assignment i s found.

	296 #

	297 # The only thing allowed at this point is a ';'. The RHS of an

	298 # assignment must look like a rule expression, and we come he re

	299 # when what is being scanned no longer looks like an expressi on.

	300 #

	301 assign-end:

	302 ';' n start doEndAssign

	303 default errorDeath doRuleErrorA ssignExpr

	304

	305

	306

	307 #

	308 # errorDeath. This state is specified as the next state whenever a syntax erro r

	309 # in the source rules is detected. Barring bugs, the state machin e will never

	310 # actually get here, but will stop because of the action associate d with the error.

	311 # But, just in case, this state asks the state machine to exit.

	312 errorDeath:

	313 default n errorDeath doExit

	314

	315

OLD	NEW

« no previous file with comments | « icu46/source/common/rbbirpt.h ('k') | icu46/source/common/rbbiscan.h » ('j') | no next file with comments »