tools/lexer_generator/rule_parser.py - Issue 59643002: Experimental parser: simplified rule lexer and parser

Unified Diff: tools/lexer_generator/rule_parser.py

Issue 59643002: Experimental parser: simplified rule lexer and parser (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/rule_parser.py

diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py

index a733801baaa66f485413871b0a0f97146e1f7107..46cce1378c01a6710728f89f464f02cec7857a70 100644

--- a/tools/lexer_generator/rule_parser.py

+++ b/tools/lexer_generator/rule_parser.py

@@ -33,48 +33,101 @@ class RuleParser:

tokens = RuleLexer.tokens

def __init__(self):

- self.aliases = dict()

- self.transitions = dict()

- def p_statement_alias(self, p):

- 'statement : ALIAS EQUALS REGEX'

- regex = self.lexer.lexer.lexmatch.group('regex')

- self.aliases[p[1]] = regex

- def p_statement_condition_transition(self, p):

- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION'

- old_condition = p[2]

- regex = self.lexer.lexer.lexmatch.group('regex').strip()

- new_condition = self.lexer.lexer.lexmatch.group('new')

- if old_condition not in self.transitions:

- self.transitions[old_condition] = dict()

- self.transitions[old_condition][regex] = ('condition', new_condition)

- def p_statement_condition_body(self, p):

- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY'

- old_condition = p[2]

- regex = self.lexer.lexer.lexmatch.group('regex').strip()

- body = self.lexer.lexer.lexmatch.group('body').strip()

- if old_condition not in self.transitions:

- self.transitions[old_condition] = dict()

- self.transitions[old_condition][regex] = ('body', body)

- def p_statement_condition_transition_body(self, p):

- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION_BODY'

- old_condition = p[2]

- regex = self.lexer.lexer.lexmatch.group('regex').strip()

- new_condition = self.lexer.lexer.lexmatch.group('new').strip()

- body = self.lexer.lexer.lexmatch.group('body').strip()

- if old_condition not in self.transitions:

- self.transitions[old_condition] = dict()

- self.transitions[old_condition][regex] = (

- 'condition_and_body', new_condition, body)

+ self.aliases = {}

+ self.current_transition = None

+ self.rules = {}

+ def p_statements(self, p):

+ 'statements : statement maybe_statements'

+ def p_maybe_statement(self, p):

+ '''maybe_statements : statements

+ | empty'''

+ def p_statement(self, p):

+ '''statement : alias_rule

+ | transition_rule'''

+ def p_alias_rule(self, p):

+ 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'

+ assert not p[1] in self.aliases

+ self.aliases[p[1]] = p[3]

+ def p_transition_rule(self, p):

+ '''transition_rule : transition composite_regex code

+ | transition composite_regex TRANSITION IDENTIFIER

+ | transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code'''

+ transition = p[0]

+ regex = p[2]

+ rules = self.rules[self.current_transition]

+ if len(p) == 4:

+ rules.append(('simple', regex, p[3]))

+ elif len(p) == 5:

+ rules.append(('transition', regex, p[4]))

+ elif len(p) == 6:

+ rules.append(('transition_with_code', regex, p[4], p[5]))

+ else:

+ raise Exception()

+ def p_transition(self, p):

+ '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''

+ # | empty''' TODO skipping transition without sr conflict

+ if p[1]:

+ self.current_transition = p[2]

+ assert self.current_transition

+ if not self.current_transition in self.rules:

+ self.rules[self.current_transition] = []

+ p[0] = self.current_transition

+ def p_composite_regex(self, p):

+ '''composite_regex : regex_part OR regex_part maybe_regex_parts

+ | regex_part maybe_regex_parts'''

+ if p[len(p)-1]:

+ p[0] = p[1:]

+ else:

+ p[0] = p[1:-1]

+ def p_maybe_regex_part(self, p):

+ '''maybe_regex_parts : composite_regex

+ | empty'''

+ p[0] = p[1]

+ def p_regex_part(self, p):

+ '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier

+ | STRING_REGEX modifier

+ | CHARACTER_CLASS_REGEX modifier

+ | IDENTIFIER modifier'''

+ if p[len(p)-1]:

+ p[0] = p[1:]

+ else:

+ p[0] = p[1:-1]

+ def p_modifier(self, p):

+ '''modifier : PLUS

+ | QUESTION_MARK

+ | STAR

+ | empty'''

+ p[0] = p[1]

+ def p_code(self, p):

+ 'code : LEFT_BRACKET code_fragments RIGHT_BRACKET'

+ p[0] = p[2].strip()

+ def p_code_fragments(self, p):

+ '''code_fragments : CODE_FRAGMENT code_fragments

+ | empty'''

+ p[0] = p[1]

+ if len(p) == 3 and p[2]:

+ p[0] = p[1] + p[2]

+ def p_empty(self, p):

+ 'empty :'

def p_error(self, p):

raise Exception("Syntax error in input '%s'" % p)

def build(self, **kwargs):

- self.parser = yacc.yacc(module=self, **kwargs)

+ self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)

self.lexer = RuleLexer()

self.lexer.build(**kwargs)

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »