Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(252)

Unified Diff: tools/lexer_generator/rule_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py
index 590fa4264f35f010cce421b07f06b7d1443a46f7..649bb9a39e5725a17098ae0da181e8ee045fbce7 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -25,15 +25,89 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ply.lex as lex
import ply.yacc as yacc
from action import Term, Action
-from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
from dfa import Dfa
from dfa_optimizer import DfaOptimizer
from transition_keys import TransitionKey, KeyEncoding
+class RuleLexer:
+
+ tokens = (
+ 'DEFAULT_ACTION',
+ 'EPSILON',
+ 'EOS',
+ 'CATCH_ALL',
+
+ 'IDENTIFIER',
+ 'STRING',
+ 'REGEX',
+ 'CHARACTER_CLASS_REGEX',
+
+ 'PLUS',
+ 'QUESTION_MARK',
+ 'EQUALS',
+ 'OR',
+ 'STAR',
+ 'LEFT_PARENTHESIS',
+ 'RIGHT_PARENTHESIS',
+ 'GRAPH_OPEN',
+ 'GRAPH_CLOSE',
+ 'SEMICOLON',
+ 'ACTION_OPEN',
+ 'ACTION_CLOSE',
+
+ 'COMMA',
+ )
+
+ t_ignore = " \t\n\r"
+
+ def t_COMMENT(self, t):
+ r'\#.*[\n\r]+'
+ pass
+
+ __special_identifiers = set(
+ ['default_action', 'epsilon', 'catch_all', 'eos'])
+
+ def t_IDENTIFIER(self, t):
+ r'[a-zA-Z0-9_]+'
+ if t.value in self.__special_identifiers:
+ t.type = t.value.upper()
+ return t
+
+ t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
+ t_REGEX = r'/(\\/|[^/])+/'
+ t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
+
+ t_PLUS = r'\+'
+ t_QUESTION_MARK = r'\?'
+ t_STAR = r'\*'
+ t_OR = r'\|'
+ t_EQUALS = '='
+ t_LEFT_PARENTHESIS = r'\('
+ t_RIGHT_PARENTHESIS = r'\)'
+ t_GRAPH_OPEN = '<<'
+ t_GRAPH_CLOSE = '>>'
+ t_SEMICOLON = ';'
+ t_ACTION_OPEN = '<'
+ t_ACTION_CLOSE = '>'
+ t_COMMA = ','
+
+ def t_LEFT_BRACKET(self, t):
+ r'{'
+ self.lexer.push_state('code')
+ self.nesting = 1
+ return t
+
+ def t_ANY_error(self, t):
+ raise Exception("Illegal character '%s'" % t.value[0])
+
+ def build(self, **kwargs):
+ self.lexer = lex.lex(module=self, **kwargs)
+
class RuleParserState:
def __init__(self, encoding):
@@ -88,7 +162,7 @@ class RuleParser:
state.rules[state.current_state] = {
'default_action': Term.empty_term(),
'uniques' : {},
- 'regex' : []
+ 'trees' : []
}
p[0] = state.current_state
@@ -100,27 +174,45 @@ class RuleParser:
'''transition_rule : composite_regex action
| DEFAULT_ACTION default_action
| EOS eos
+ | EPSILON epsilon
| CATCH_ALL action'''
precedence = RuleParser.__rule_precedence_counter
RuleParser.__rule_precedence_counter += 1
action = p[2]
(entry_action, match_action, transition) = action
- if transition and not transition in self.__keyword_transitions:
- assert not transition == 'default', "can't append default graph"
- self.__state.transitions.add(transition)
+ if transition and not transition.name() in self.__keyword_transitions:
+ assert not transition.name() == 'default', "can't append default graph"
+ self.__state.transitions.add(transition.name())
rules = self.__state.rules[self.__state.current_state]
+ # process default action
if p[1] == 'default_action':
assert self.__state.current_state == 'default'
assert not rules['default_action']
assert not entry_action
rules['default_action'] = match_action
- elif p[1] == 'eos' or p[1] == 'catch_all':
+ return
+ # process tree
+ if p[1] == 'eos' or p[1] == 'catch_all':
assert p[1] not in rules['uniques']
rules['uniques'][p[1]] = True
- rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action))
+ tree = NfaBuilder.unique_key(p[1])
+ elif p[1] == 'epsilon':
+ tree = Term.empty_term()
+ else:
+ tree = p[1] # regex case
+ # handle actions
+ if entry_action or match_action:
+ tree = NfaBuilder.add_action(
+ tree, Action(entry_action, match_action, precedence))
+ # handle transitions
+ if not transition:
+ pass
+ elif transition.name() == 'continue':
+ tree = NfaBuilder.add_continue(tree, transition.args()[0])
else:
- regex = p[1]
- rules['regex'].append((regex, precedence, action))
+ tree = NfaBuilder.join_subtree(tree, transition.name())
+ # store tree
+ rules['trees'].append(tree)
def p_action(self, p):
'''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action OR maybe_transition ACTION_CLOSE'''
@@ -128,11 +220,16 @@ class RuleParser:
def p_default_action(self, p):
'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'
- p[0] = (Term.empty_term(), p[2], None)
+ p[0] = (Term.empty_term(), p[2], Term.empty_term())
def p_eos(self, p):
'eos : ACTION_OPEN identifier_action ACTION_CLOSE'
- p[0] = (Term.empty_term(), p[2], None)
+ p[0] = (Term.empty_term(), p[2], Term.empty_term())
+
+ def p_epsilon(self, p):
+ 'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE'
+ assert p[2], 'cannot have an empty epsilon transition'
+ p[0] = (Term.empty_term(), Term.empty_term(), p[2])
def p_maybe_identifier_action(self, p):
'''maybe_identifier_action : identifier_action
@@ -141,8 +238,14 @@ class RuleParser:
def p_maybe_transition(self, p):
'''maybe_transition : IDENTIFIER
+ | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS
| empty'''
- p[0] = p[1]
+ if len(p) == 5:
+ assert p[1] == 'continue', 'only continue can take arguments'
+ p[0] = Term(p[1], p[3])
+ else:
+ assert len(p) == 2
+ p[0] = Term(p[1], '0') if p[1] else Term.empty_term()
def p_identifier_action(self, p):
'''identifier_action : IDENTIFIER
@@ -312,24 +415,11 @@ class RuleProcessor(object):
def __process_parser_state(self, parser_state):
assert 'default' in parser_state.rules
rule_map = {}
- def process(tree_name, v):
- trees = []
- for tree, precedence, action in v['regex']:
- (entry_action, match_action, transition) = action
- if entry_action or match_action:
- tree = NfaBuilder.add_action(
- tree, Action(entry_action, match_action, precedence))
- if not transition:
- pass
- elif transition == 'continue':
- tree = NfaBuilder.add_continue(tree)
- else:
- tree = NfaBuilder.join_subtree(tree, transition)
- trees.append(tree)
- rule_map[tree_name] = NfaBuilder.or_terms(trees)
# process all subgraphs
- for k, v in parser_state.rules.items():
- process(k, v)
+ for tree_name, v in parser_state.rules.items():
+ if not v['trees']:
+ continue
+ rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])
# build the automata
for name, tree in rule_map.items():
self.__automata[name] = RuleProcessor.Automata(
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698