| Index: tools/lexer_generator/rule_lexer.py
|
| diff --git a/tools/lexer_generator/rule_lexer.py b/tools/lexer_generator/rule_lexer.py
|
| index b5bb8c403530818c32b5c601197a47736fd07465..05bf03a0d5201b248fac3ea8a6eb2a5c43de3ef2 100644
|
| --- a/tools/lexer_generator/rule_lexer.py
|
| +++ b/tools/lexer_generator/rule_lexer.py
|
| @@ -30,69 +30,78 @@ import ply.lex as lex
|
| class RuleLexer:
|
|
|
| tokens = (
|
| - 'ALIAS',
|
| - 'EQUALS',
|
| - 'REGEX',
|
| - 'CONDITION',
|
| - 'CONDITION_BEGIN',
|
| - 'CONDITION_END',
|
| - 'REGEX_TRANSITION_BODY',
|
| - 'REGEX_TRANSITION',
|
| - 'REGEX_BODY',
|
| - )
|
| -
|
| - t_ANY_ignore = " \t\n"
|
| + 'IDENTIFIER',
|
| + 'STRING_REGEX',
|
| + 'CHARACTER_CLASS_REGEX',
|
| + 'TRANSITION',
|
| + 'TRANSITION_WITH_CODE',
|
| +
|
| + 'PLUS',
|
| + 'QUESTION_MARK',
|
| + 'EQUALS',
|
| + 'OR',
|
| + 'STAR',
|
| + 'LEFT_PARENTHESIS',
|
| + 'RIGHT_PARENTHESIS',
|
| + 'LESS_THAN',
|
| + 'GREATER_THAN',
|
| + 'SEMICOLON',
|
| +
|
| + 'LEFT_BRACKET',
|
| + 'RIGHT_BRACKET',
|
| +
|
| + 'CODE_FRAGMENT',
|
| + )
|
|
|
| states = (
|
| - ('afterAlias', 'exclusive'),
|
| - ('afterAliasEquals', 'exclusive'),
|
| - ('inCondition', 'exclusive'),
|
| - ('seenCondition', 'exclusive'),
|
| - ('afterCondition', 'exclusive'))
|
| -
|
| - def t_ALIAS(self, t):
|
| - r'[a-zA-Z0-9_]+'
|
| - self.lexer.begin('afterAlias')
|
| + ('code','exclusive'),
|
| + )
|
| +
|
| + t_ignore = " \t\n\r"
|
| + t_code_ignore = ""
|
| +
|
| + def t_COMMENT(self, t):
|
| + r'\#.*[\n\r]+'
|
| + pass
|
| +
|
| + t_IDENTIFIER = r'[a-zA-Z0-9_]+'
|
| + t_STRING_REGEX = r'"((\\("|\w|\\))|[^\\"])+"'
|
| + t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
|
| + t_TRANSITION = r':=>'
|
| + t_TRANSITION_WITH_CODE = r'=>'
|
| +
|
| + t_PLUS = r'\+'
|
| + t_QUESTION_MARK = r'\?'
|
| + t_STAR = r'\*'
|
| + t_OR = r'\|'
|
| + t_EQUALS = r'='
|
| + t_LEFT_PARENTHESIS = r'\('
|
| + t_RIGHT_PARENTHESIS = r'\)'
|
| + t_LESS_THAN = r'<'
|
| + t_GREATER_THAN = r'>'
|
| + t_SEMICOLON = r';'
|
| +
|
| + def t_LEFT_BRACKET(self, t):
|
| + r'{'
|
| + self.lexer.push_state('code')
|
| + self.nesting = 1
|
| return t
|
|
|
| - def t_afterAlias_EQUALS(self, t):
|
| - r'='
|
| - self.lexer.begin('afterAliasEquals')
|
| - return t
|
| -
|
| - def t_afterAliasEquals_REGEX(self, t):
|
| - r'(?P<regex>.+)\s*;'
|
| - self.lexer.begin('INITIAL')
|
| - return t
|
| -
|
| - def t_CONDITION_BEGIN(self, t):
|
| - r'<'
|
| - self.lexer.begin('inCondition')
|
| - return t
|
| -
|
| - def t_inCondition_CONDITION(self, t):
|
| - r'[a-zA-Z0-9_]+'
|
| - self.lexer.begin('seenCondition')
|
| - return t
|
| -
|
| - def t_seenCondition_CONDITION_END(self, t):
|
| - r'>\s*'
|
| - self.lexer.begin('afterCondition')
|
| - return t
|
| -
|
| - def t_afterCondition_REGEX_TRANSITION_BODY(self, t):
|
| - r'(?P<regex>.+)\s*=>\s*(?P<new>.+)\s*{\s*(?P<body>.+)\s*}\s*'
|
| - self.lexer.begin('INITIAL')
|
| - return t
|
| + t_code_CODE_FRAGMENT = r'[^{}]+'
|
|
|
| - def t_afterCondition_REGEX_TRANSITION(self, t):
|
| - r'(?P<regex>.+)\s*:=>\s*(?P<new>.+)\s*'
|
| - self.lexer.begin('INITIAL')
|
| + def t_code_LEFT_BRACKET(self, t):
|
| + r'{'
|
| + self.nesting += 1
|
| + t.type = 'CODE_FRAGMENT'
|
| return t
|
|
|
| - def t_afterCondition_REGEX_BODY(self, t):
|
| - r'(?P<regex>.+?)\s+{\s*(?P<body>.+)\s*}\s*'
|
| - self.lexer.begin('INITIAL')
|
| + def t_code_RIGHT_BRACKET(self, t):
|
| + r'}'
|
| + self.nesting -= 1
|
| + if self.nesting:
|
| + t.type = 'CODE_FRAGMENT'
|
| + else:
|
| + self.lexer.pop_state()
|
| return t
|
|
|
| def t_ANY_error(self, t):
|
|
|