tools/lexer_generator/rule_parser.py - Issue 138973007: Experimental parser: support subgraph inlining

Unified Diff: tools/lexer_generator/rule_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/rule_parser.py

diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py

index 590fa4264f35f010cce421b07f06b7d1443a46f7..649bb9a39e5725a17098ae0da181e8ee045fbce7 100644

--- a/tools/lexer_generator/rule_parser.py

+++ b/tools/lexer_generator/rule_parser.py

@@ -25,15 +25,89 @@

# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+import ply.lex as lex

import ply.yacc as yacc

from action import Term, Action

-from rule_lexer import RuleLexer

from regex_parser import RegexParser

from nfa_builder import NfaBuilder

from dfa import Dfa

from dfa_optimizer import DfaOptimizer

from transition_keys import TransitionKey, KeyEncoding

+class RuleLexer:

+ tokens = (

+ 'DEFAULT_ACTION',

+ 'EPSILON',

+ 'EOS',

+ 'CATCH_ALL',

+ 'IDENTIFIER',

+ 'STRING',

+ 'REGEX',

+ 'CHARACTER_CLASS_REGEX',

+ 'PLUS',

+ 'QUESTION_MARK',

+ 'EQUALS',

+ 'OR',

+ 'STAR',

+ 'LEFT_PARENTHESIS',

+ 'RIGHT_PARENTHESIS',

+ 'GRAPH_OPEN',

+ 'GRAPH_CLOSE',

+ 'SEMICOLON',

+ 'ACTION_OPEN',

+ 'ACTION_CLOSE',

+ 'COMMA',

+ )

+ t_ignore = " \t\n\r"

+ def t_COMMENT(self, t):

+ r'\#.*[\n\r]+'

+ pass

+ __special_identifiers = set(

+ ['default_action', 'epsilon', 'catch_all', 'eos'])

+ def t_IDENTIFIER(self, t):

+ r'[a-zA-Z0-9_]+'

+ if t.value in self.__special_identifiers:

+ t.type = t.value.upper()

+ return t

+ t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'

+ t_REGEX = r'/(\\/|[^/])+/'

+ t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'

+ t_PLUS = r'\+'

+ t_QUESTION_MARK = r'\?'

+ t_STAR = r'\*'

+ t_OR = r'\|'

+ t_EQUALS = '='

+ t_LEFT_PARENTHESIS = r'\('

+ t_RIGHT_PARENTHESIS = r'\)'

+ t_GRAPH_OPEN = '<<'

+ t_GRAPH_CLOSE = '>>'

+ t_SEMICOLON = ';'

+ t_ACTION_OPEN = '<'

+ t_ACTION_CLOSE = '>'

+ t_COMMA = ','

+ def t_LEFT_BRACKET(self, t):

+ r'{'

+ self.lexer.push_state('code')

+ self.nesting = 1

+ return t

+ def t_ANY_error(self, t):

+ raise Exception("Illegal character '%s'" % t.value[0])

+ def build(self, **kwargs):

+ self.lexer = lex.lex(module=self, **kwargs)

class RuleParserState:

def __init__(self, encoding):

@@ -88,7 +162,7 @@ class RuleParser:

state.rules[state.current_state] = {

'default_action': Term.empty_term(),

'uniques' : {},

- 'regex' : []

+ 'trees' : []

}

p[0] = state.current_state

@@ -100,27 +174,45 @@ class RuleParser:

'''transition_rule : composite_regex action

| DEFAULT_ACTION default_action

| EOS eos

+ | EPSILON epsilon

| CATCH_ALL action'''

precedence = RuleParser.__rule_precedence_counter

RuleParser.__rule_precedence_counter += 1

action = p[2]

(entry_action, match_action, transition) = action

- if transition and not transition in self.__keyword_transitions:

- assert not transition == 'default', "can't append default graph"

- self.__state.transitions.add(transition)

+ if transition and not transition.name() in self.__keyword_transitions:

+ assert not transition.name() == 'default', "can't append default graph"

+ self.__state.transitions.add(transition.name())

rules = self.__state.rules[self.__state.current_state]

+ # process default action

if p[1] == 'default_action':

assert self.__state.current_state == 'default'

assert not rules['default_action']

assert not entry_action

rules['default_action'] = match_action

- elif p[1] == 'eos' or p[1] == 'catch_all':

+ return

+ # process tree

+ if p[1] == 'eos' or p[1] == 'catch_all':

assert p[1] not in rules['uniques']

rules['uniques'][p[1]] = True

- rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action))

+ tree = NfaBuilder.unique_key(p[1])

+ elif p[1] == 'epsilon':

+ tree = Term.empty_term()

+ else:

+ tree = p[1] # regex case

+ # handle actions

+ if entry_action or match_action:

+ tree = NfaBuilder.add_action(

+ tree, Action(entry_action, match_action, precedence))

+ # handle transitions

+ if not transition:

+ pass

+ elif transition.name() == 'continue':

+ tree = NfaBuilder.add_continue(tree, transition.args()[0])

else:

- regex = p[1]

- rules['regex'].append((regex, precedence, action))

+ tree = NfaBuilder.join_subtree(tree, transition.name())

+ # store tree

+ rules['trees'].append(tree)

def p_action(self, p):

'''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action OR maybe_transition ACTION_CLOSE'''

@@ -128,11 +220,16 @@ class RuleParser:

def p_default_action(self, p):

'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'

- p[0] = (Term.empty_term(), p[2], None)

+ p[0] = (Term.empty_term(), p[2], Term.empty_term())

def p_eos(self, p):

'eos : ACTION_OPEN identifier_action ACTION_CLOSE'

- p[0] = (Term.empty_term(), p[2], None)

+ p[0] = (Term.empty_term(), p[2], Term.empty_term())

+ def p_epsilon(self, p):

+ 'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE'

+ assert p[2], 'cannot have an empty epsilon transition'

+ p[0] = (Term.empty_term(), Term.empty_term(), p[2])

def p_maybe_identifier_action(self, p):

'''maybe_identifier_action : identifier_action

@@ -141,8 +238,14 @@ class RuleParser:

def p_maybe_transition(self, p):

'''maybe_transition : IDENTIFIER

+ | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS

| empty'''

- p[0] = p[1]

+ if len(p) == 5:

+ assert p[1] == 'continue', 'only continue can take arguments'

+ p[0] = Term(p[1], p[3])

+ else:

+ assert len(p) == 2

+ p[0] = Term(p[1], '0') if p[1] else Term.empty_term()

def p_identifier_action(self, p):

'''identifier_action : IDENTIFIER

@@ -312,24 +415,11 @@ class RuleProcessor(object):

def __process_parser_state(self, parser_state):

assert 'default' in parser_state.rules

rule_map = {}

- def process(tree_name, v):

- trees = []

- for tree, precedence, action in v['regex']:

- (entry_action, match_action, transition) = action

- if entry_action or match_action:

- tree = NfaBuilder.add_action(

- tree, Action(entry_action, match_action, precedence))

- if not transition:

- pass

- elif transition == 'continue':

- tree = NfaBuilder.add_continue(tree)

- else:

- tree = NfaBuilder.join_subtree(tree, transition)

- trees.append(tree)

- rule_map[tree_name] = NfaBuilder.or_terms(trees)

# process all subgraphs

- for k, v in parser_state.rules.items():

- process(k, v)

+ for tree_name, v in parser_state.rules.items():

+ if not v['trees']:

+ continue

+ rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])

# build the automata

for name, tree in rule_map.items():

self.__automata[name] = RuleProcessor.Automata(

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »