tools/lexer_generator/rule_lexer.py - Issue 59643002: Experimental parser: simplified rule lexer and parser

Unified Diff: tools/lexer_generator/rule_lexer.py

Issue 59643002: Experimental parser: simplified rule lexer and parser (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/rule_lexer.py

diff --git a/tools/lexer_generator/rule_lexer.py b/tools/lexer_generator/rule_lexer.py

index b5bb8c403530818c32b5c601197a47736fd07465..05bf03a0d5201b248fac3ea8a6eb2a5c43de3ef2 100644

--- a/tools/lexer_generator/rule_lexer.py

+++ b/tools/lexer_generator/rule_lexer.py

@@ -30,69 +30,78 @@ import ply.lex as lex

class RuleLexer:

tokens = (

- 'ALIAS',

- 'EQUALS',

- 'REGEX',

- 'CONDITION',

- 'CONDITION_BEGIN',

- 'CONDITION_END',

- 'REGEX_TRANSITION_BODY',

- 'REGEX_TRANSITION',

- 'REGEX_BODY',

- )

- t_ANY_ignore = " \t\n"

+ 'IDENTIFIER',

+ 'STRING_REGEX',

+ 'CHARACTER_CLASS_REGEX',

+ 'TRANSITION',

+ 'TRANSITION_WITH_CODE',

+ 'PLUS',

+ 'QUESTION_MARK',

+ 'EQUALS',

+ 'OR',

+ 'STAR',

+ 'LEFT_PARENTHESIS',

+ 'RIGHT_PARENTHESIS',

+ 'LESS_THAN',

+ 'GREATER_THAN',

+ 'SEMICOLON',

+ 'LEFT_BRACKET',

+ 'RIGHT_BRACKET',

+ 'CODE_FRAGMENT',

+ )

states = (

- ('afterAlias', 'exclusive'),

- ('afterAliasEquals', 'exclusive'),

- ('inCondition', 'exclusive'),

- ('seenCondition', 'exclusive'),

- ('afterCondition', 'exclusive'))

- def t_ALIAS(self, t):

- r'[a-zA-Z0-9_]+'

- self.lexer.begin('afterAlias')

+ ('code','exclusive'),

+ )

+ t_ignore = " \t\n\r"

+ t_code_ignore = ""

+ def t_COMMENT(self, t):

+ r'\#.*[\n\r]+'

+ pass

+ t_IDENTIFIER = r'[a-zA-Z0-9_]+'

+ t_STRING_REGEX = r'"((\\("|\w|\\))|[^\\"])+"'

+ t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'

+ t_TRANSITION = r':=>'

+ t_TRANSITION_WITH_CODE = r'=>'

+ t_PLUS = r'\+'

+ t_QUESTION_MARK = r'\?'

+ t_STAR = r'\*'

+ t_OR = r'\|'

+ t_EQUALS = r'='

+ t_LEFT_PARENTHESIS = r'\('

+ t_RIGHT_PARENTHESIS = r'\)'

+ t_LESS_THAN = r'<'

+ t_GREATER_THAN = r'>'

+ t_SEMICOLON = r';'

+ def t_LEFT_BRACKET(self, t):

+ r'{'

+ self.lexer.push_state('code')

+ self.nesting = 1

return t

- def t_afterAlias_EQUALS(self, t):

- r'='

- self.lexer.begin('afterAliasEquals')

- return t

- def t_afterAliasEquals_REGEX(self, t):

- r'(?P<regex>.+)\s*;'

- self.lexer.begin('INITIAL')

- return t

- def t_CONDITION_BEGIN(self, t):

- r'<'

- self.lexer.begin('inCondition')

- return t

- def t_inCondition_CONDITION(self, t):

- r'[a-zA-Z0-9_]+'

- self.lexer.begin('seenCondition')

- return t

- def t_seenCondition_CONDITION_END(self, t):

- r'>\s*'

- self.lexer.begin('afterCondition')

- return t

- def t_afterCondition_REGEX_TRANSITION_BODY(self, t):

- r'(?P<regex>.+)\s*=>\s*(?P<new>.+)\s*{\s*(?P<body>.+)\s*}\s*'

- self.lexer.begin('INITIAL')

- return t

+ t_code_CODE_FRAGMENT = r'[^{}]+'

- def t_afterCondition_REGEX_TRANSITION(self, t):

- r'(?P<regex>.+)\s*:=>\s*(?P<new>.+)\s*'

- self.lexer.begin('INITIAL')

+ def t_code_LEFT_BRACKET(self, t):

+ r'{'

+ self.nesting += 1

+ t.type = 'CODE_FRAGMENT'

return t

- def t_afterCondition_REGEX_BODY(self, t):

- r'(?P<regex>.+?)\s+{\s*(?P<body>.+)\s*}\s*'

- self.lexer.begin('INITIAL')

+ def t_code_RIGHT_BRACKET(self, t):

+ r'}'

+ self.nesting -= 1

+ if self.nesting:

+ t.type = 'CODE_FRAGMENT'

+ else:

+ self.lexer.pop_state()

return t

def t_ANY_error(self, t):

« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »