Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Unified Diff: tools/lexer_generator/regex_parser.py

Issue 50873003: Experimental Parser: add lexer generator (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py b/tools/lexer_generator/regex_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..6851b70a499d3565e790a47d8299cedefc0a0b1e
--- /dev/null
+++ b/tools/lexer_generator/regex_parser.py
@@ -0,0 +1,134 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import ply.yacc as yacc
+from regex_lexer import RegexLexer
+from types import ListType, TupleType
+
+class RegexParser:
+
+ tokens = RegexLexer.tokens
+
+ token_map = {
+ '+': 'ONE_OR_MORE',
+ '?': 'ZERO_OR_ONE',
+ '*': 'ZERO_OR_MORE',
+ '|': 'OR',
+ '.': 'ANY',
+ }
+
+ def p_start(self, p):
+ '''start : fragments OR fragments
+ | fragments'''
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = (self.token_map[p[2]], p[1], p[3])
+
+ def p_fragments(self, p):
+ '''fragments : fragment
+ | fragment fragments'''
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = self.__cat(p[1], p[2])
+
+ def p_fragment(self, p):
+ '''fragment : literal maybe_modifier
+ | class maybe_modifier
+ | group maybe_modifier
+ | any maybe_modifier
+ '''
+ if p[2] != None:
+ p[0] = (p[2], p[1])
+ else:
+ p[0] = p[1]
+
+ def p_maybe_modifier(self, p):
+ '''maybe_modifier : ONE_OR_MORE
+ | ZERO_OR_ONE
+ | ZERO_OR_MORE
+ | empty'''
+ p[0] = p[1]
+ if p[1] != None:
+ p[0] = self.token_map[p[1]]
+
+ def p_literal(self, p):
+ '''literal : LITERAL'''
+ p[0] = ('LITERAL', p[1])
+
+ def p_any(self, p):
+ '''any : ANY'''
+ p[0] = (self.token_map[p[1]],)
+
+ def p_class(self, p):
+ '''class : CLASS_BEGIN class_content CLASS_END
+ | CLASS_BEGIN NOT class_content CLASS_END'''
+ if len(p) == 4:
+ p[0] = ("CLASS", p[2])
+ else:
+ p[0] = ("NOT_CLASS", p[3])
+
+ def p_group(self, p):
+ '''group : GROUP_BEGIN start GROUP_END'''
+ p[0] = p[2]
+
+ def p_class_content(self, p):
+ '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
+ | CLASS_LITERAL maybe_class_content
+ '''
+ if len(p) == 5:
+ left = ("RANGE", p[1], p[3])
+ else:
+ left = ('LITERAL', p[1])
+ p[0] = self.__cat(left, p[len(p)-1])
+
+ def p_maybe_class_content(self, p):
+ '''maybe_class_content : class_content
+ | empty'''
+ p[0] = p[1]
+
+ def p_empty(self, p):
+ 'empty :'
+
+ def p_error(self, p):
+ raise Exception("Syntax error in input '%s'" % p)
+
+ @staticmethod
+ def __cat(left, right):
+ if right == None:
+ return left
+ return ('CAT', left, right)
+
+ def build(self, **kwargs):
+ self.parser = yacc.yacc(module=self, **kwargs)
+ self.lexer = RegexLexer()
+ self.lexer.build(**kwargs)
+
+ def parse(self, data):
+ return self.parser.parse(data, lexer=self.lexer.lexer)
+
« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698