Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(388)

Side by Side Diff: tools/lexer_generator/transition_keys.py

Issue 66283004: Experimental parser: unique transition key support (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 11 matching lines...) Expand all
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from string import printable 28 from string import printable
29 29
30 class TransitionKey: 30 class TransitionKey:
31 31
32 __class_bounds = {
33 "latin_1" : (0, 255),
34 "whitespace" : (256, 256),
35 "literal" : (257, 257),
36 }
32 __lower_bound = 0 37 __lower_bound = 0
33 __latin_1_upper_bound = 255 38 __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.item s(), 0)
34 __unicode_whitespace_bounds = (256, 256) 39
35 __unicode_literal_bounds = (257, 257) 40 __cached_keys = {}
36 __upper_bound = 257 41
42 __unique_key_counter = -1
43
44 @staticmethod
45 def __in_latin_1(char):
46 bound = TransitionKey.__class_bounds["latin_1"]
47 return (bound[0] <= char and char <= bound[1])
48
49 @staticmethod
50 def __is_class_range(r):
51 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0])
52
53 @staticmethod
54 def __is_unique_range(r):
55 return r[0] == r[1] and r[0] < TransitionKey.__lower_bound
37 56
38 @staticmethod 57 @staticmethod
39 def __verify_ranges(ranges, check_merged): 58 def __verify_ranges(ranges, check_merged):
40 assert ranges 59 assert ranges
60 if len(ranges) == 1 and TransitionKey.__is_class_range(ranges[0]):
61 return
41 last = None 62 last = None
42 for r in ranges: 63 for r in ranges:
43 assert TransitionKey.__lower_bound <= r[0] 64 assert TransitionKey.__lower_bound <= r[0]
44 assert r[1] <= TransitionKey.__upper_bound 65 assert r[1] <= TransitionKey.__upper_bound
45 assert r[0] <= r[1] 66 assert r[0] <= r[1]
46 r_is_class = ( 67 r_is_class = TransitionKey.__is_class_range(r)
47 r == TransitionKey.__unicode_whitespace_bounds or
48 r == TransitionKey.__unicode_literal_bounds)
49 if last != None and check_merged: 68 if last != None and check_merged:
50 assert last[1] + 1 < r[0] or r_is_class 69 assert last[1] + 1 < r[0] or r_is_class
51 if (r[0] > TransitionKey.__latin_1_upper_bound or 70 if not TransitionKey.__in_latin_1(r[0]):
52 r[1] > TransitionKey.__latin_1_upper_bound): 71 assert r_is_class
72 if not TransitionKey.__in_latin_1(r[1]):
53 assert r_is_class 73 assert r_is_class
54 last = r 74 last = r
55 75
56 @staticmethod 76 @staticmethod
57 def __create(ranges): 77 def __create(ranges, name = None):
58 TransitionKey.__verify_ranges(ranges, True) 78 if not ranges:
79 assert name == 'epsilon'
80 assert not name in TransitionKey.__cached_keys
81 else:
82 TransitionKey.__verify_ranges(ranges, True)
59 key = TransitionKey() 83 key = TransitionKey()
60 key.__ranges = tuple(ranges) # immutable 84 key.__ranges = tuple(ranges) # immutable
85 key.__name = name
61 key.__cached_hash = None 86 key.__cached_hash = None
62 return key 87 return key
63 88
64 __cached_epsilon = None 89 @staticmethod
90 def __cached_key(name, bounds_getter):
91 if not name in TransitionKey.__cached_keys:
92 bounds = bounds_getter(name)
93 TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name)
94 return TransitionKey.__cached_keys[name]
95
65 @staticmethod 96 @staticmethod
66 def epsilon(): 97 def epsilon():
67 if (TransitionKey.__cached_epsilon == None): 98 return TransitionKey.__cached_key("epsilon", lambda name : [])
68 key = TransitionKey()
69 key.__ranges = tuple([])
70 key.__cached_hash = None
71 TransitionKey.__cached_epsilon = key
72 return TransitionKey.__cached_epsilon
73 99
74 __cached_any = None
75 @staticmethod 100 @staticmethod
76 def any(): 101 def any():
77 if (TransitionKey.__cached_any == None): 102 return TransitionKey.__cached_key("any",
78 bounds = [ 103 lambda name : TransitionKey.__class_bounds.values())
79 (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound),
80 TransitionKey.__unicode_whitespace_bounds,
81 TransitionKey.__unicode_literal_bounds,
82 ]
83 TransitionKey.__cached_any = TransitionKey.__create(bounds)
84 return TransitionKey.__cached_any
85 104
86 @staticmethod 105 @staticmethod
87 def single_char(char): 106 def single_char(char):
88 char = ord(char) 107 char = ord(char)
89 assert (TransitionKey.__lower_bound <= char and 108 assert TransitionKey.__in_latin_1(char)
90 char <= TransitionKey.__latin_1_upper_bound)
91 return TransitionKey.__create([(char, char)]) 109 return TransitionKey.__create([(char, char)])
92 110
93 @staticmethod 111 @staticmethod
112 def unique_key(name):
113 def get_bounds(name):
114 bound = TransitionKey.__unique_key_counter
115 TransitionKey.__unique_key_counter -= 1
116 return [(bound, bound)]
117 name = "__" + name
118 return TransitionKey.__cached_key(name, get_bounds)
119
120 @staticmethod
94 def __process_graph(graph, ranges, key_map): 121 def __process_graph(graph, ranges, key_map):
95 key = graph[0] 122 key = graph[0]
96 if key == 'RANGE': 123 if key == 'RANGE':
97 ranges.append((ord(graph[1]), ord(graph[2]))) 124 ranges.append((ord(graph[1]), ord(graph[2])))
98 elif key == 'LITERAL': 125 elif key == 'LITERAL':
99 ranges.append((ord(graph[1]), ord(graph[1]))) 126 ranges.append((ord(graph[1]), ord(graph[1])))
100 elif key == 'CAT': 127 elif key == 'CAT':
101 for x in [graph[1], graph[2]]: 128 for x in [graph[1], graph[2]]:
102 TransitionKey.__process_graph(x, ranges, key_map) 129 TransitionKey.__process_graph(x, ranges, key_map)
103 elif key == 'CHARACTER_CLASS': 130 elif key == 'CHARACTER_CLASS':
104 class_name = graph[1] 131 class_name = graph[1]
105 if class_name == 'ws': 132 if class_name == 'ws':
106 ranges.append(TransitionKey.__unicode_whitespace_bounds) 133 ranges.append(TransitionKey.__class_bounds["whitespace"])
107 elif class_name == 'lit': 134 elif class_name == 'lit':
108 ranges.append(TransitionKey.__unicode_literal_bounds) 135 ranges.append(TransitionKey.__class_bounds["literal"])
109 elif class_name in key_map: 136 elif class_name in key_map:
110 ranges += key_map[class_name].__ranges 137 ranges += key_map[class_name].__ranges
111 else: 138 else:
112 raise Exception("unknown character class [%s]" % graph[1]) 139 raise Exception("unknown character class [%s]" % graph[1])
113 else: 140 else:
114 raise Exception("bad key [%s]" % key) 141 raise Exception("bad key [%s]" % key)
115 142
116 @staticmethod 143 @staticmethod
117 def character_class(graph, key_map): 144 def character_class(graph, key_map):
118 ranges = [] 145 ranges = []
(...skipping 29 matching lines...) Expand all
148 def __eq__(self, other): 175 def __eq__(self, other):
149 return isinstance(other, self.__class__) and self.__ranges == other.__ranges 176 return isinstance(other, self.__class__) and self.__ranges == other.__ranges
150 177
151 __printable_cache = { 178 __printable_cache = {
152 ord('\t') : '\\t', 179 ord('\t') : '\\t',
153 ord('\n') : '\\n', 180 ord('\n') : '\\n',
154 ord('\r') : '\\r', 181 ord('\r') : '\\r',
155 } 182 }
156 183
157 @staticmethod 184 @staticmethod
158 def __print_range(r): 185 def __range_str(r):
186 if TransitionKey.__is_class_range(r):
187 for name, v in TransitionKey.__class_bounds.items():
188 if r == v: return name
189 assert False
159 def to_str(x): 190 def to_str(x):
160 if x <= TransitionKey.__latin_1_upper_bound: 191 assert TransitionKey.__in_latin_1(x)
161 if not x in TransitionKey.__printable_cache: 192 if not x in TransitionKey.__printable_cache:
162 res = "'%s'" % chr(x) if chr(x) in printable else str(x) 193 res = "'%s'" % chr(x) if chr(x) in printable else str(x)
163 TransitionKey.__printable_cache[x] = res 194 TransitionKey.__printable_cache[x] = res
164 return TransitionKey.__printable_cache[x] 195 return TransitionKey.__printable_cache[x]
165 if x == TransitionKey.__unicode_literal_bounds[0]:
166 return "literal"
167 if x == TransitionKey.__unicode_whitespace_bounds[0]:
168 return "whitespace"
169 assert False
170 if r[0] == r[1]: 196 if r[0] == r[1]:
171 return "%s" % to_str(r[0]) 197 return "%s" % to_str(r[0])
172 else: 198 else:
173 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) 199 return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))
174 200
175 def __str__(self): 201 def __str__(self):
176 if self == self.epsilon(): 202 if self.__name:
177 return "epsilon" 203 return self.__name
178 if self == self.any(): 204 return ", ".join(TransitionKey.__range_str(x) for x in self.__ranges)
179 return "any"
180 return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges)
181 205
182 @staticmethod 206 @staticmethod
183 def __disjoint_keys(range_map): 207 def __disjoint_keys(range_map):
184 sort = lambda x : sorted(set(x)) 208 sort = lambda x : sorted(set(x))
185 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) 209 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
186 ranges = [] 210 ranges = []
187 upper_bound = TransitionKey.__upper_bound + 1 211 upper_bound = TransitionKey.__upper_bound + 1
188 for i in range(len(range_map)): 212 for i in range(len(range_map)):
189 (left, left_values) = range_map[i] 213 (left, left_values) = range_map[i]
190 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound 214 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 ranges = TransitionKey.__merge_ranges(ranges) 253 ranges = TransitionKey.__merge_ranges(ranges)
230 if invert: 254 if invert:
231 ranges = TransitionKey.__invert_ranges(ranges) 255 ranges = TransitionKey.__invert_ranges(ranges)
232 return TransitionKey.__create(ranges) 256 return TransitionKey.__create(ranges)
233 257
234 @staticmethod 258 @staticmethod
235 def __merge_ranges(ranges): 259 def __merge_ranges(ranges):
236 merged = [] 260 merged = []
237 last = None 261 last = None
238 for r in ranges: 262 for r in ranges:
263 assert not TransitionKey.__is_unique_range(r)
239 if last == None: 264 if last == None:
240 last = r 265 last = r
241 elif (last[1] + 1 == r[0] and 266 elif (last[1] + 1 == r[0] and not TransitionKey.__is_class_range(r)):
242 r != TransitionKey.__unicode_whitespace_bounds and
243 r != TransitionKey.__unicode_literal_bounds):
244 last = (last[0], r[1]) 267 last = (last[0], r[1])
245 else: 268 else:
246 merged.append(last) 269 merged.append(last)
247 last = r 270 last = r
248 if last != None: 271 if last != None:
249 merged.append(last) 272 merged.append(last)
250 return merged 273 return merged
251 274
252 @staticmethod 275 @staticmethod
253 def merged_key(keys): 276 def merged_key(keys):
254 f = lambda acc, key: acc + list(key.__ranges) 277 f = lambda acc, key: acc + list(key.__ranges)
255 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) 278 return TransitionKey.__key_from_ranges(False, reduce(f, keys, []))
256 279
257 @staticmethod 280 @staticmethod
258 def __invert_ranges(ranges): 281 def __invert_ranges(ranges):
259 inverted = [] 282 inverted = []
260 last = None 283 last = None
261 contains_whitespace = False 284 classes = set(TransitionKey.__class_bounds.values())
262 contains_literal = False 285 classes.remove(TransitionKey.__class_bounds["latin_1"])
263 for r in ranges: 286 for r in ranges:
264 if r == TransitionKey.__unicode_whitespace_bounds: 287 assert not TransitionKey.__is_unique_range(r)
265 contains_whitespace = True 288 if TransitionKey.__is_class_range(r):
266 continue 289 classes.remove(r)
267 if r == TransitionKey.__unicode_literal_bounds:
268 contains_literal = True
269 continue 290 continue
270 if last == None: 291 if last == None:
271 if r[0] != TransitionKey.__lower_bound: 292 if r[0] != TransitionKey.__lower_bound:
272 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) 293 inverted.append((TransitionKey.__lower_bound, r[0] - 1))
273 elif last[1] + 1 < r[0]: 294 elif last[1] + 1 < r[0]:
274 inverted.append((last[1] + 1, r[0] - 1)) 295 inverted.append((last[1] + 1, r[0] - 1))
275 last = r 296 last = r
276 if last != None and last[1] < TransitionKey.__latin_1_upper_bound: 297 upper_bound = TransitionKey.__class_bounds["latin_1"][1]
277 inverted.append((last[1] + 1, TransitionKey.__latin_1_upper_bound)) 298 if last != None and last[1] < upper_bound:
278 if not contains_whitespace: 299 inverted.append((last[1] + 1, upper_bound))
279 inverted.append(TransitionKey.__unicode_whitespace_bounds) 300 inverted += list(classes)
280 if not contains_literal:
281 inverted.append(TransitionKey.__unicode_literal_bounds)
282 return inverted 301 return inverted
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698