Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Unified Diff: tools/lexer_generator/regex_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py b/tools/lexer_generator/regex_parser.py
index 4053bec03fde1635231fa2da50fb7728da8cb563..d19a5432570508bef88dd5e8e99fb28ec4fe7ea8 100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -25,11 +25,123 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ply.lex as lex
import ply.yacc as yacc
from types import ListType, TupleType
from regex_lexer import RegexLexer
from action import Term
+def build_escape_map(chars):
+ def add_escape(d, char):
+ d['\\' + char] = char
+ return d
+ return reduce(add_escape, chars,
+ {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
+class RegexLexer:
+
+ tokens = (
+
+ 'GROUP_BEGIN',
+ 'GROUP_END',
+
+ 'CLASS_BEGIN',
+ 'CLASS_END',
+
+ 'OR',
+ 'ONE_OR_MORE',
+ 'ZERO_OR_MORE',
+ 'ZERO_OR_ONE',
+ 'ANY',
+
+ 'REPEAT_BEGIN',
+ 'REPEAT_END',
+
+ 'NUMBER',
+ 'COMMA',
+ 'LITERAL',
+
+ 'RANGE',
+ 'NOT',
+ 'CLASS_LITERAL',
+ 'CLASS_LITERAL_AS_OCTAL',
+ 'CHARACTER_CLASS',
+ )
+
+ states = (
+ ('class','exclusive'),
+ ('repeat','exclusive'),
+ )
+
+ __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
+
+ def t_ESCAPED_LITERAL(self, t):
+ r'\\.'
+ t.type = 'LITERAL'
+ t.value = RegexLexer.__escaped_literals[t.value]
+ return t
+
+ t_GROUP_BEGIN = r'\('
+ t_GROUP_END = r'\)'
+
+ t_OR = r'\|'
+ t_ONE_OR_MORE = r'\+'
+ t_ZERO_OR_MORE = r'\*'
+ t_ZERO_OR_ONE = r'\?'
+
+ t_ANY = r'\.'
+
+ t_LITERAL = r'.'
+
+ def t_CLASS_BEGIN(self, t):
+ r'\['
+ self.lexer.push_state('class')
+ return t
+
+ def t_class_CLASS_END(self, t):
+ r'\]'
+ self.lexer.pop_state()
+ return t
+
+ t_class_RANGE = '-'
+ t_class_NOT = '\^'
+ t_class_CHARACTER_CLASS = r':\w+:'
+
+ def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
+ r'\\\d+'
+ return t
+
+ __escaped_class_literals = build_escape_map("^[]-:\\")
+
+ def t_class_ESCAPED_CLASS_LITERAL(self, t):
+ r'\\.'
+ t.type = 'CLASS_LITERAL'
+ t.value = RegexLexer.__escaped_class_literals[t.value]
+ return t
+
+ t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
+
+ def t_REPEAT_BEGIN(self, t):
+ r'\{'
+ self.lexer.push_state('repeat')
+ return t
+
+ def t_repeat_REPEAT_END(self, t):
+ r'\}'
+ self.lexer.pop_state()
+ return t
+
+ t_repeat_NUMBER = r'[0-9]+'
+ t_repeat_COMMA = r','
+
+ t_ANY_ignore = '\n'
+
+ def t_ANY_error(self, t):
+ raise Exception("Illegal character '%s'" % t.value[0])
+
+ def build(self, **kwargs):
+ self.lexer = lex.lex(module=self, **kwargs)
+
class RegexParser:
tokens = RegexLexer.tokens
« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698