Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: tools/lexer_generator/transition_keys.py

Issue 54773002: Experimental parser: key inversion (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/transition_key_test.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
27 from string import printable 28 from string import printable
28 29
29 class TransitionKey: 30 class TransitionKey:
30 31
31 __lower_bound = 0 32 __lower_bound = 0
32 __latin_1_upper_bound = 255 33 __latin_1_upper_bound = 255
33 __unicode_whitespace_bounds = (256, 256) 34 __unicode_whitespace_bounds = (256, 256)
34 __unicode_literal_bounds = (257, 257) 35 __unicode_literal_bounds = (257, 257)
35 __upper_bound = 257 36 __upper_bound = 257
36 37
37 @staticmethod 38 @staticmethod
38 def __verify_ranges(ranges): 39 def __verify_ranges(ranges, check_merged):
39 last = (TransitionKey.__lower_bound - 1, TransitionKey.__lower_bound - 1) 40 assert ranges
41 last = None
40 for r in ranges: 42 for r in ranges:
41 assert TransitionKey.__lower_bound <= r[0] 43 assert TransitionKey.__lower_bound <= r[0]
42 assert r[1] <= TransitionKey.__upper_bound 44 assert r[1] <= TransitionKey.__upper_bound
43 assert r[0] <= r[1] 45 assert r[0] <= r[1]
44 assert last[1] < r[0] 46 r_is_class = (
45 # TODO check classes are always disjoint points 47 r == TransitionKey.__unicode_whitespace_bounds or
48 r == TransitionKey.__unicode_literal_bounds)
49 if last != None and check_merged:
50 assert last[1] + 1 < r[0] or r_is_class
51 if (r[0] > TransitionKey.__latin_1_upper_bound or
52 r[1] > TransitionKey.__latin_1_upper_bound):
53 assert r_is_class
46 last = r 54 last = r
47 55
48 @staticmethod 56 @staticmethod
49 def __create(ranges): 57 def __create(ranges):
50 TransitionKey.__verify_ranges(ranges) 58 TransitionKey.__verify_ranges(ranges, True)
51 key = TransitionKey() 59 key = TransitionKey()
52 key.__ranges = tuple(ranges) # immutable 60 key.__ranges = tuple(ranges) # immutable
53 key.__cached_hash = None 61 key.__cached_hash = None
54 return key 62 return key
55 63
56 __cached_epsilon = None 64 __cached_epsilon = None
57 @staticmethod 65 @staticmethod
58 def epsilon(): 66 def epsilon():
59 if (TransitionKey.__cached_epsilon == None): 67 if (TransitionKey.__cached_epsilon == None):
60 TransitionKey.__cached_epsilon = TransitionKey.__create([]) 68 key = TransitionKey()
69 key.__ranges = tuple([])
70 key.__cached_hash = None
71 TransitionKey.__cached_epsilon = key
61 return TransitionKey.__cached_epsilon 72 return TransitionKey.__cached_epsilon
62 73
63 __cached_any = None 74 __cached_any = None
64 @staticmethod 75 @staticmethod
65 def any(): 76 def any():
66 if (TransitionKey.__cached_any == None): 77 if (TransitionKey.__cached_any == None):
67 bounds = [ 78 bounds = [
68 (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound), 79 (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound),
69 TransitionKey.__unicode_whitespace_bounds, 80 TransitionKey.__unicode_whitespace_bounds,
70 TransitionKey.__unicode_literal_bounds, 81 TransitionKey.__unicode_literal_bounds,
71 ] 82 ]
72 TransitionKey.__cached_any = TransitionKey.__create(bounds) 83 TransitionKey.__cached_any = TransitionKey.__create(bounds)
73 return TransitionKey.__cached_any 84 return TransitionKey.__cached_any
74 85
75 @staticmethod 86 @staticmethod
76 def single_char(char): 87 def single_char(char):
77 char = ord(char) 88 char = ord(char)
78 assert (TransitionKey.__lower_bound <= char and 89 assert (TransitionKey.__lower_bound <= char and
79 char <= TransitionKey.__latin_1_upper_bound) 90 char <= TransitionKey.__latin_1_upper_bound)
80 return TransitionKey.__create([(char, char)]) 91 return TransitionKey.__create([(char, char)])
81 92
82 @staticmethod 93 @staticmethod
94 def __process_graph(graph, ranges):
95 key = graph[0]
96 if key == 'RANGE':
97 ranges.append((ord(graph[1]), ord(graph[2])))
98 elif key == 'LITERAL':
99 ranges.append((ord(graph[1]), ord(graph[1])))
100 elif key == 'CAT':
101 for x in [graph[1], graph[2]]:
102 TransitionKey.__process_graph(x, ranges)
103 else:
104 assert False, "bad key %s" % key
105
106 @staticmethod
83 def character_class(invert, graph): 107 def character_class(invert, graph):
84 # TODO 108 ranges = []
85 return TransitionKey.__create([(129, 129)]) 109 TransitionKey.__process_graph(graph, ranges)
110 return TransitionKey.__key_from_ranges(invert, ranges)
86 111
87 def matches_char(self, char): 112 def matches_char(self, char):
88 char = ord(char) 113 char = ord(char)
89 # TODO class checks 114 # TODO class checks
90 for r in self.__ranges: 115 for r in self.__ranges:
91 if r[0] <= char and char <= r[1]: return True 116 if r[0] <= char and char <= r[1]: return True
92 return False 117 return False
93 118
94 def matches_key(self, key): 119 def matches_key(self, key):
95 assert isinstance(key, self.__class__) 120 assert isinstance(key, self.__class__)
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
132 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) 157 return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))
133 158
134 def __str__(self): 159 def __str__(self):
135 if self == self.epsilon(): 160 if self == self.epsilon():
136 return "epsilon" 161 return "epsilon"
137 if self == self.any(): 162 if self == self.any():
138 return "any" 163 return "any"
139 return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges) 164 return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges)
140 165
141 @staticmethod 166 @staticmethod
142 def disjoint_keys(key_set): 167 def __disjoint_keys(range_map):
143 range_map = {}
144 for x in key_set:
145 for r in x.__ranges:
146 if not r[0] in range_map:
147 range_map[r[0]] = []
148 range_map[r[0]].append(r[1])
149 sort = lambda x : sorted(set(x)) 168 sort = lambda x : sorted(set(x))
150 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) 169 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
151 ranges = [] 170 ranges = []
152 upper_bound = TransitionKey.__upper_bound + 1 171 upper_bound = TransitionKey.__upper_bound + 1
153 for i in range(len(range_map)): 172 for i in range(len(range_map)):
154 (left, left_values) = range_map[i] 173 (left, left_values) = range_map[i]
155 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound 174 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound
156 to_push = [] 175 to_push = []
157 for v in left_values: 176 for v in left_values:
158 if v >= next: 177 if v >= next:
159 if not to_push: 178 if not to_push:
160 ranges.append((left, next - 1)) 179 ranges.append((left, next - 1))
161 to_push.append(v) 180 to_push.append(v)
162 else: 181 else:
163 ranges.append((left, v)) 182 ranges.append((left, v))
164 left = v + 1 183 left = v + 1
165 if to_push: 184 if to_push:
166 current = range_map[i + 1] 185 current = range_map[i + 1]
167 range_map[i + 1] = (current[0], sort(current[1] + to_push)) 186 range_map[i + 1] = (current[0], sort(current[1] + to_push))
168 TransitionKey.__verify_ranges(ranges) 187 return ranges
188
189 @staticmethod
190 def disjoint_keys(key_set):
191 if not key_set:
192 return []
193 range_map = {}
194 for x in key_set:
195 for r in x.__ranges:
196 if not r[0] in range_map:
197 range_map[r[0]] = []
198 range_map[r[0]].append(r[1])
199 ranges = TransitionKey.__disjoint_keys(range_map)
200 TransitionKey.__verify_ranges(ranges, False)
169 return map(lambda x : TransitionKey.__create([x]), ranges) 201 return map(lambda x : TransitionKey.__create([x]), ranges)
202
203 @staticmethod
204 def __key_from_ranges(invert, ranges):
205 range_map = {}
206 for r in ranges:
207 if not r[0] in range_map:
208 range_map[r[0]] = []
209 range_map[r[0]].append(r[1])
210 ranges = TransitionKey.__disjoint_keys(range_map)
211 ranges = TransitionKey.__merge_ranges(ranges)
212 if invert:
213 ranges = TransitionKey.__invert_ranges(ranges)
214 return TransitionKey.__create(ranges)
215
216 @staticmethod
217 def __merge_ranges(ranges):
218 merged = []
219 last = None
220 for r in ranges:
221 if last == None:
222 last = r
223 elif (last[1] + 1 == r[0] and
224 r != TransitionKey.__unicode_whitespace_bounds and
225 r != TransitionKey.__unicode_literal_bounds):
226 last = (last[0], r[1])
227 else:
228 merged.append(last)
229 last = r
230 if last != None:
231 merged.append(last)
232 return merged
233
234 @staticmethod
235 def __invert_ranges(ranges):
236 inverted = []
237 last = None
238 contains_whitespace = False
239 contains_literal = False
240 for r in ranges:
241 if r == TransitionKey.__unicode_whitespace_bounds:
242 contains_whitespace = True
243 continue
244 if r == TransitionKey.__unicode_literal_bounds:
245 contains_literal = True
246 continue
247 if last == None:
248 if r[0] != TransitionKey.__lower_bound:
249 inverted.append((TransitionKey.__lower_bound, r[0] - 1))
250 elif last[1] + 1 < r[0]:
251 inverted.append((last[1] + 1, r[0] - 1))
252 last = r
253 if last != None and last[1] < TransitionKey.__latin_1_upper_bound:
254 inverted.append((last[1] + 1, TransitionKey.__latin_1_upper_bound))
255 if not contains_whitespace:
256 inverted.append(TransitionKey.__unicode_whitespace_bounds)
257 if not contains_literal:
258 inverted.append(TransitionKey.__unicode_literal_bounds)
259 return inverted
OLDNEW
« no previous file with comments | « tools/lexer_generator/transition_key_test.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698