Index: tools/lexer_generator/rule_lexer.py |
diff --git a/tools/lexer_generator/rule_lexer.py b/tools/lexer_generator/rule_lexer.py |
index b5bb8c403530818c32b5c601197a47736fd07465..05bf03a0d5201b248fac3ea8a6eb2a5c43de3ef2 100644 |
--- a/tools/lexer_generator/rule_lexer.py |
+++ b/tools/lexer_generator/rule_lexer.py |
@@ -30,69 +30,78 @@ import ply.lex as lex |
class RuleLexer: |
tokens = ( |
- 'ALIAS', |
- 'EQUALS', |
- 'REGEX', |
- 'CONDITION', |
- 'CONDITION_BEGIN', |
- 'CONDITION_END', |
- 'REGEX_TRANSITION_BODY', |
- 'REGEX_TRANSITION', |
- 'REGEX_BODY', |
- ) |
- |
- t_ANY_ignore = " \t\n" |
+ 'IDENTIFIER', |
+ 'STRING_REGEX', |
+ 'CHARACTER_CLASS_REGEX', |
+ 'TRANSITION', |
+ 'TRANSITION_WITH_CODE', |
+ |
+ 'PLUS', |
+ 'QUESTION_MARK', |
+ 'EQUALS', |
+ 'OR', |
+ 'STAR', |
+ 'LEFT_PARENTHESIS', |
+ 'RIGHT_PARENTHESIS', |
+ 'LESS_THAN', |
+ 'GREATER_THAN', |
+ 'SEMICOLON', |
+ |
+ 'LEFT_BRACKET', |
+ 'RIGHT_BRACKET', |
+ |
+ 'CODE_FRAGMENT', |
+ ) |
states = ( |
- ('afterAlias', 'exclusive'), |
- ('afterAliasEquals', 'exclusive'), |
- ('inCondition', 'exclusive'), |
- ('seenCondition', 'exclusive'), |
- ('afterCondition', 'exclusive')) |
- |
- def t_ALIAS(self, t): |
- r'[a-zA-Z0-9_]+' |
- self.lexer.begin('afterAlias') |
+ ('code','exclusive'), |
+ ) |
+ |
+ t_ignore = " \t\n\r" |
+ t_code_ignore = "" |
+ |
+ def t_COMMENT(self, t): |
+ r'\#.*[\n\r]+' |
+ pass |
+ |
+ t_IDENTIFIER = r'[a-zA-Z0-9_]+' |
+ t_STRING_REGEX = r'"((\\("|\w|\\))|[^\\"])+"' |
+ t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]' |
+ t_TRANSITION = r':=>' |
+ t_TRANSITION_WITH_CODE = r'=>' |
+ |
+ t_PLUS = r'\+' |
+ t_QUESTION_MARK = r'\?' |
+ t_STAR = r'\*' |
+ t_OR = r'\|' |
+ t_EQUALS = r'=' |
+ t_LEFT_PARENTHESIS = r'\(' |
+ t_RIGHT_PARENTHESIS = r'\)' |
+ t_LESS_THAN = r'<' |
+ t_GREATER_THAN = r'>' |
+ t_SEMICOLON = r';' |
+ |
+ def t_LEFT_BRACKET(self, t): |
+ r'{' |
+ self.lexer.push_state('code') |
+ self.nesting = 1 |
return t |
- def t_afterAlias_EQUALS(self, t): |
- r'=' |
- self.lexer.begin('afterAliasEquals') |
- return t |
- |
- def t_afterAliasEquals_REGEX(self, t): |
- r'(?P<regex>.+)\s*;' |
- self.lexer.begin('INITIAL') |
- return t |
- |
- def t_CONDITION_BEGIN(self, t): |
- r'<' |
- self.lexer.begin('inCondition') |
- return t |
- |
- def t_inCondition_CONDITION(self, t): |
- r'[a-zA-Z0-9_]+' |
- self.lexer.begin('seenCondition') |
- return t |
- |
- def t_seenCondition_CONDITION_END(self, t): |
- r'>\s*' |
- self.lexer.begin('afterCondition') |
- return t |
- |
- def t_afterCondition_REGEX_TRANSITION_BODY(self, t): |
- r'(?P<regex>.+)\s*=>\s*(?P<new>.+)\s*{\s*(?P<body>.+)\s*}\s*' |
- self.lexer.begin('INITIAL') |
- return t |
+ t_code_CODE_FRAGMENT = r'[^{}]+' |
- def t_afterCondition_REGEX_TRANSITION(self, t): |
- r'(?P<regex>.+)\s*:=>\s*(?P<new>.+)\s*' |
- self.lexer.begin('INITIAL') |
+ def t_code_LEFT_BRACKET(self, t): |
+ r'{' |
+ self.nesting += 1 |
+ t.type = 'CODE_FRAGMENT' |
return t |
- def t_afterCondition_REGEX_BODY(self, t): |
- r'(?P<regex>.+?)\s+{\s*(?P<body>.+)\s*}\s*' |
- self.lexer.begin('INITIAL') |
+ def t_code_RIGHT_BRACKET(self, t): |
+ r'}' |
+ self.nesting -= 1 |
+ if self.nesting: |
+ t.type = 'CODE_FRAGMENT' |
+ else: |
+ self.lexer.pop_state() |
return t |
def t_ANY_error(self, t): |