Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Unified Diff: tools/lexer_generator/rule_parser.py

Issue 59643002: Experimental parser: simplified rule lexer and parser (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py
index a733801baaa66f485413871b0a0f97146e1f7107..46cce1378c01a6710728f89f464f02cec7857a70 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -33,48 +33,101 @@ class RuleParser:
tokens = RuleLexer.tokens
def __init__(self):
- self.aliases = dict()
- self.transitions = dict()
-
- def p_statement_alias(self, p):
- 'statement : ALIAS EQUALS REGEX'
- regex = self.lexer.lexer.lexmatch.group('regex')
- self.aliases[p[1]] = regex
-
- def p_statement_condition_transition(self, p):
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION'
- old_condition = p[2]
- regex = self.lexer.lexer.lexmatch.group('regex').strip()
- new_condition = self.lexer.lexer.lexmatch.group('new')
- if old_condition not in self.transitions:
- self.transitions[old_condition] = dict()
- self.transitions[old_condition][regex] = ('condition', new_condition)
-
- def p_statement_condition_body(self, p):
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY'
- old_condition = p[2]
- regex = self.lexer.lexer.lexmatch.group('regex').strip()
- body = self.lexer.lexer.lexmatch.group('body').strip()
- if old_condition not in self.transitions:
- self.transitions[old_condition] = dict()
- self.transitions[old_condition][regex] = ('body', body)
-
- def p_statement_condition_transition_body(self, p):
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION_BODY'
- old_condition = p[2]
- regex = self.lexer.lexer.lexmatch.group('regex').strip()
- new_condition = self.lexer.lexer.lexmatch.group('new').strip()
- body = self.lexer.lexer.lexmatch.group('body').strip()
- if old_condition not in self.transitions:
- self.transitions[old_condition] = dict()
- self.transitions[old_condition][regex] = (
- 'condition_and_body', new_condition, body)
+ self.aliases = {}
+ self.current_transition = None
+ self.rules = {}
+
+ def p_statements(self, p):
+ 'statements : statement maybe_statements'
+
+ def p_maybe_statement(self, p):
+ '''maybe_statements : statements
+ | empty'''
+
+ def p_statement(self, p):
+ '''statement : alias_rule
+ | transition_rule'''
+
+ def p_alias_rule(self, p):
+ 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
+ assert not p[1] in self.aliases
+ self.aliases[p[1]] = p[3]
+
+ def p_transition_rule(self, p):
+ '''transition_rule : transition composite_regex code
+ | transition composite_regex TRANSITION IDENTIFIER
+ | transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code'''
+ transition = p[0]
+ regex = p[2]
+ rules = self.rules[self.current_transition]
+ if len(p) == 4:
+ rules.append(('simple', regex, p[3]))
+ elif len(p) == 5:
+ rules.append(('transition', regex, p[4]))
+ elif len(p) == 6:
+ rules.append(('transition_with_code', regex, p[4], p[5]))
+ else:
+ raise Exception()
+
+ def p_transition(self, p):
+ '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''
+ # | empty''' TODO skipping transition without sr conflict
+ if p[1]:
+ self.current_transition = p[2]
+ assert self.current_transition
+ if not self.current_transition in self.rules:
+ self.rules[self.current_transition] = []
+ p[0] = self.current_transition
+
+ def p_composite_regex(self, p):
+ '''composite_regex : regex_part OR regex_part maybe_regex_parts
+ | regex_part maybe_regex_parts'''
+ if p[len(p)-1]:
+ p[0] = p[1:]
+ else:
+ p[0] = p[1:-1]
+
+ def p_maybe_regex_part(self, p):
+ '''maybe_regex_parts : composite_regex
+ | empty'''
+ p[0] = p[1]
+
+ def p_regex_part(self, p):
+ '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier
+ | STRING_REGEX modifier
+ | CHARACTER_CLASS_REGEX modifier
+ | IDENTIFIER modifier'''
+ if p[len(p)-1]:
+ p[0] = p[1:]
+ else:
+ p[0] = p[1:-1]
+
+ def p_modifier(self, p):
+ '''modifier : PLUS
+ | QUESTION_MARK
+ | STAR
+ | empty'''
+ p[0] = p[1]
+
+ def p_code(self, p):
+ 'code : LEFT_BRACKET code_fragments RIGHT_BRACKET'
+ p[0] = p[2].strip()
+
+ def p_code_fragments(self, p):
+ '''code_fragments : CODE_FRAGMENT code_fragments
+ | empty'''
+ p[0] = p[1]
+ if len(p) == 3 and p[2]:
+ p[0] = p[1] + p[2]
+
+ def p_empty(self, p):
+ 'empty :'
def p_error(self, p):
raise Exception("Syntax error in input '%s'" % p)
def build(self, **kwargs):
- self.parser = yacc.yacc(module=self, **kwargs)
+ self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
self.lexer = RuleLexer()
self.lexer.build(**kwargs)
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698