Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(757)

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.lex as lex
28 import ply.yacc as yacc 29 import ply.yacc as yacc
29 from types import ListType, TupleType 30 from types import ListType, TupleType
30 from regex_lexer import RegexLexer 31 from regex_lexer import RegexLexer
31 from action import Term 32 from action import Term
32 33
34 def build_escape_map(chars):
35 def add_escape(d, char):
36 d['\\' + char] = char
37 return d
38 return reduce(add_escape, chars,
39 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
40
41 class RegexLexer:
42
43 tokens = (
44
45 'GROUP_BEGIN',
46 'GROUP_END',
47
48 'CLASS_BEGIN',
49 'CLASS_END',
50
51 'OR',
52 'ONE_OR_MORE',
53 'ZERO_OR_MORE',
54 'ZERO_OR_ONE',
55 'ANY',
56
57 'REPEAT_BEGIN',
58 'REPEAT_END',
59
60 'NUMBER',
61 'COMMA',
62 'LITERAL',
63
64 'RANGE',
65 'NOT',
66 'CLASS_LITERAL',
67 'CLASS_LITERAL_AS_OCTAL',
68 'CHARACTER_CLASS',
69 )
70
71 states = (
72 ('class','exclusive'),
73 ('repeat','exclusive'),
74 )
75
76 __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
77
78 def t_ESCAPED_LITERAL(self, t):
79 r'\\.'
80 t.type = 'LITERAL'
81 t.value = RegexLexer.__escaped_literals[t.value]
82 return t
83
84 t_GROUP_BEGIN = r'\('
85 t_GROUP_END = r'\)'
86
87 t_OR = r'\|'
88 t_ONE_OR_MORE = r'\+'
89 t_ZERO_OR_MORE = r'\*'
90 t_ZERO_OR_ONE = r'\?'
91
92 t_ANY = r'\.'
93
94 t_LITERAL = r'.'
95
96 def t_CLASS_BEGIN(self, t):
97 r'\['
98 self.lexer.push_state('class')
99 return t
100
101 def t_class_CLASS_END(self, t):
102 r'\]'
103 self.lexer.pop_state()
104 return t
105
106 t_class_RANGE = '-'
107 t_class_NOT = '\^'
108 t_class_CHARACTER_CLASS = r':\w+:'
109
110 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
111 r'\\\d+'
112 return t
113
114 __escaped_class_literals = build_escape_map("^[]-:\\")
115
116 def t_class_ESCAPED_CLASS_LITERAL(self, t):
117 r'\\.'
118 t.type = 'CLASS_LITERAL'
119 t.value = RegexLexer.__escaped_class_literals[t.value]
120 return t
121
122 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
123
124 def t_REPEAT_BEGIN(self, t):
125 r'\{'
126 self.lexer.push_state('repeat')
127 return t
128
129 def t_repeat_REPEAT_END(self, t):
130 r'\}'
131 self.lexer.pop_state()
132 return t
133
134 t_repeat_NUMBER = r'[0-9]+'
135 t_repeat_COMMA = r','
136
137 t_ANY_ignore = '\n'
138
139 def t_ANY_error(self, t):
140 raise Exception("Illegal character '%s'" % t.value[0])
141
142 def build(self, **kwargs):
143 self.lexer = lex.lex(module=self, **kwargs)
144
33 class RegexParser: 145 class RegexParser:
34 146
35 tokens = RegexLexer.tokens 147 tokens = RegexLexer.tokens
36 148
37 token_map = { 149 token_map = {
38 '+': 'ONE_OR_MORE', 150 '+': 'ONE_OR_MORE',
39 '?': 'ZERO_OR_ONE', 151 '?': 'ZERO_OR_ONE',
40 '*': 'ZERO_OR_MORE', 152 '*': 'ZERO_OR_MORE',
41 '|': 'OR', 153 '|': 'OR',
42 '.': 'ANY', 154 '.': 'ANY',
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 parser = RegexParser.__static_instance 266 parser = RegexParser.__static_instance
155 if not parser: 267 if not parser:
156 parser = RegexParser() 268 parser = RegexParser()
157 parser.build() 269 parser.build()
158 RegexParser.__static_instance = parser 270 RegexParser.__static_instance = parser
159 try: 271 try:
160 return parser.parser.parse(data, lexer=parser.lexer.lexer) 272 return parser.parser.parse(data, lexer=parser.lexer.lexer)
161 except Exception: 273 except Exception:
162 RegexParser.__static_instance = None 274 RegexParser.__static_instance = None
163 raise 275 raise
OLDNEW
« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698