OLD | NEW |
1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
4 # met: | 4 # met: |
5 # | 5 # |
6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
(...skipping 12 matching lines...) Expand all Loading... |
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 from string import printable | 28 from string import printable |
29 | 29 |
30 class TransitionKey: | 30 class TransitionKey: |
31 | 31 |
32 __class_bounds = { | 32 __class_bounds = { |
33 "latin_1" : (1, 255), | 33 'latin_1' : (1, 255), |
34 # These are not "real" ranges; they just need to be separate. | 34 # These are not real ranges; they just need to be separate from any real |
35 "whitespace" : (256, 256), | 35 # ranges. |
36 "literal" : (257, 257), | 36 'whitespace' : (256, 256), |
37 "eos" : (258, 258), | 37 'literal' : (257, 257), |
38 "zero" : (259, 259), | 38 'eos' : (258, 258), |
| 39 'zero' : (259, 259), |
39 } | 40 } |
40 __lower_bound = 1 | 41 __lower_bound = 1 |
41 __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.item
s(), 0) | 42 __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.item
s(), 0) |
42 | 43 |
43 __cached_keys = {} | 44 __cached_keys = {} |
44 | 45 |
45 __unique_key_counter = -1 | 46 __unique_key_counter = -1 |
46 | 47 |
47 @staticmethod | 48 @staticmethod |
48 def __in_latin_1(char): | 49 def __in_latin_1(char): |
49 bound = TransitionKey.__class_bounds["latin_1"] | 50 bound = TransitionKey.__class_bounds['latin_1'] |
50 return (bound[0] <= char and char <= bound[1]) | 51 return (bound[0] <= char and char <= bound[1]) |
51 | 52 |
52 @staticmethod | 53 @staticmethod |
53 def __is_class_range(r): | 54 def __is_class_range(r): |
54 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0]) | 55 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0]) |
55 | 56 |
56 @staticmethod | 57 @staticmethod |
57 def __is_unique_range(r): | 58 def __is_unique_range(r): |
58 return r[0] == r[1] and r[0] < TransitionKey.__lower_bound | 59 return r[0] == r[1] and r[0] < TransitionKey.__lower_bound |
59 | 60 |
60 @staticmethod | 61 @staticmethod |
61 def __verify_ranges(ranges, check_merged): | 62 def __verify_ranges(ranges, check_merged): |
62 assert ranges | 63 assert ranges |
63 if len(ranges) == 1 and TransitionKey.__is_class_range(ranges[0]): | 64 if len(ranges) == 1 and TransitionKey.__is_class_range(ranges[0]): |
64 return | 65 return |
65 last = None | 66 last = None |
66 for r in ranges: | 67 for r in ranges: |
67 assert TransitionKey.__lower_bound <= r[0] | 68 assert TransitionKey.__lower_bound <= r[0] |
68 assert r[1] <= TransitionKey.__upper_bound | 69 assert r[1] <= TransitionKey.__upper_bound |
69 assert r[0] <= r[1] | 70 assert r[0] <= r[1] |
70 r_is_class = TransitionKey.__is_class_range(r) | 71 r_is_class = TransitionKey.__is_class_range(r) |
| 72 # Assert that the ranges are in order. |
71 if last != None and check_merged: | 73 if last != None and check_merged: |
72 assert last[1] + 1 < r[0] or r_is_class | 74 assert last[1] + 1 < r[0] or r_is_class |
73 if not TransitionKey.__in_latin_1(r[0]): | 75 if not TransitionKey.__in_latin_1(r[0]): |
74 assert r_is_class | 76 assert r_is_class |
75 if not TransitionKey.__in_latin_1(r[1]): | 77 if not TransitionKey.__in_latin_1(r[1]): |
76 assert r_is_class | 78 assert r_is_class |
77 last = r | 79 last = r |
78 | 80 |
79 @staticmethod | 81 @staticmethod |
80 def __create(ranges, name = None): | 82 def __create(ranges, name = None): |
(...skipping 13 matching lines...) Expand all Loading... |
94 | 96 |
95 @staticmethod | 97 @staticmethod |
96 def __cached_key(name, bounds_getter): | 98 def __cached_key(name, bounds_getter): |
97 if not name in TransitionKey.__cached_keys: | 99 if not name in TransitionKey.__cached_keys: |
98 bounds = bounds_getter(name) | 100 bounds = bounds_getter(name) |
99 TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name) | 101 TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name) |
100 return TransitionKey.__cached_keys[name] | 102 return TransitionKey.__cached_keys[name] |
101 | 103 |
102 @staticmethod | 104 @staticmethod |
103 def epsilon(): | 105 def epsilon(): |
104 return TransitionKey.__cached_key("epsilon", lambda name : []) | 106 return TransitionKey.__cached_key('epsilon', lambda name : []) |
105 | 107 |
106 @staticmethod | 108 @staticmethod |
107 def any(): | 109 def any(): |
108 return TransitionKey.__cached_key("any", | 110 def bounds_getter(name): |
109 lambda name : TransitionKey.__class_bounds.values()) | 111 bounds = TransitionKey.__class_bounds.values() |
| 112 bounds.sort() |
| 113 return bounds |
| 114 return TransitionKey.__cached_key('any', bounds_getter) |
110 | 115 |
111 @staticmethod | 116 @staticmethod |
112 def single_char(char): | 117 def single_char(char): |
113 char = ord(char) | 118 char = ord(char) |
114 assert TransitionKey.__in_latin_1(char) | 119 assert TransitionKey.__in_latin_1(char) |
115 return TransitionKey.__create([(char, char)]) | 120 return TransitionKey.__create([(char, char)]) |
116 | 121 |
117 @staticmethod | 122 @staticmethod |
118 def unique(name): | 123 def unique(name): |
119 def get_bounds(name): | 124 def get_bounds(name): |
120 bound = TransitionKey.__unique_key_counter | 125 bound = TransitionKey.__unique_key_counter |
121 TransitionKey.__unique_key_counter -= 1 | 126 TransitionKey.__unique_key_counter -= 1 |
122 return [(bound, bound)] | 127 return [(bound, bound)] |
123 name = "__" + name | 128 name = '__' + name |
124 return TransitionKey.__cached_key(name, get_bounds) | 129 return TransitionKey.__cached_key(name, get_bounds) |
125 | 130 |
126 @staticmethod | 131 @staticmethod |
127 def __process_graph(graph, ranges, key_map): | 132 def __process_graph(graph, ranges, key_map): |
128 key = graph[0] | 133 key = graph[0] |
129 if key == 'RANGE': | 134 if key == 'RANGE': |
130 ranges.append((ord(graph[1]), ord(graph[2]))) | 135 ranges.append((ord(graph[1]), ord(graph[2]))) |
131 elif key == 'LITERAL': | 136 elif key == 'LITERAL': |
132 ranges.append((ord(graph[1]), ord(graph[1]))) | 137 ranges.append((ord(graph[1]), ord(graph[1]))) |
133 elif key == 'CAT': | 138 elif key == 'CAT': |
134 for x in [graph[1], graph[2]]: | 139 for x in [graph[1], graph[2]]: |
135 TransitionKey.__process_graph(x, ranges, key_map) | 140 TransitionKey.__process_graph(x, ranges, key_map) |
136 elif key == 'CHARACTER_CLASS': | 141 elif key == 'CHARACTER_CLASS': |
137 class_name = graph[1] | 142 class_name = graph[1] |
138 if class_name == 'ws': | 143 if class_name == 'ws': |
139 ranges.append(TransitionKey.__class_bounds['whitespace']) | 144 ranges.append(TransitionKey.__class_bounds['whitespace']) |
140 elif class_name == 'lit': | 145 elif class_name == 'lit': |
141 ranges.append(TransitionKey.__class_bounds['literal']) | 146 ranges.append(TransitionKey.__class_bounds['literal']) |
142 elif class_name == 'eos': | 147 elif class_name == 'eos': |
143 ranges.append(TransitionKey.__class_bounds['eos']) | 148 ranges.append(TransitionKey.__class_bounds['eos']) |
144 elif class_name == 'zero': | 149 elif class_name == 'zero': |
145 ranges.append(TransitionKey.__class_bounds['zero']) | 150 ranges.append(TransitionKey.__class_bounds['zero']) |
146 elif class_name in key_map: | 151 elif class_name in key_map: |
147 ranges += key_map[class_name].__ranges | 152 ranges += key_map[class_name].__ranges |
148 else: | 153 else: |
149 raise Exception("unknown character class [%s]" % graph[1]) | 154 raise Exception('unknown character class [%s]' % graph[1]) |
150 else: | 155 else: |
151 raise Exception("bad key [%s]" % key) | 156 raise Exception('bad key [%s]' % key) |
152 | 157 |
153 @staticmethod | 158 @staticmethod |
154 def character_class(graph, key_map): | 159 def character_class(graph, key_map): |
155 ranges = [] | 160 ranges = [] |
156 assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS' | 161 assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS' |
157 invert = graph[0] == 'NOT_CLASS' | 162 invert = graph[0] == 'NOT_CLASS' |
158 TransitionKey.__process_graph(graph[1], ranges, key_map) | 163 TransitionKey.__process_graph(graph[1], ranges, key_map) |
159 return TransitionKey.__key_from_ranges(invert, ranges) | 164 return TransitionKey.__key_from_ranges(invert, ranges) |
160 | 165 |
161 def matches_char(self, char): | 166 def matches_char(self, char): |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
231 def __range_str(r): | 236 def __range_str(r): |
232 if TransitionKey.__is_class_range(r): | 237 if TransitionKey.__is_class_range(r): |
233 return TransitionKey.__class_name(r) | 238 return TransitionKey.__class_name(r) |
234 def to_str(x): | 239 def to_str(x): |
235 assert TransitionKey.__in_latin_1(x) | 240 assert TransitionKey.__in_latin_1(x) |
236 if not x in TransitionKey.__printable_cache: | 241 if not x in TransitionKey.__printable_cache: |
237 res = "'%s'" % chr(x) if chr(x) in printable else str(x) | 242 res = "'%s'" % chr(x) if chr(x) in printable else str(x) |
238 TransitionKey.__printable_cache[x] = res | 243 TransitionKey.__printable_cache[x] = res |
239 return TransitionKey.__printable_cache[x] | 244 return TransitionKey.__printable_cache[x] |
240 if r[0] == r[1]: | 245 if r[0] == r[1]: |
241 return "%s" % to_str(r[0]) | 246 return '%s' % to_str(r[0]) |
242 else: | 247 else: |
243 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) | 248 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) |
244 | 249 |
245 def __str__(self): | 250 def __str__(self): |
246 if self.__name: | 251 if self.__name: |
247 return self.__name | 252 return self.__name |
248 return ", ".join(TransitionKey.__range_str(x) for x in self.__ranges) | 253 return ', '.join(TransitionKey.__range_str(x) for x in self.__ranges) |
249 | 254 |
250 @staticmethod | 255 @staticmethod |
251 def __disjoint_keys(range_map): | 256 def __disjoint_keys(range_map): |
252 sort = lambda x : sorted(set(x)) | 257 sort = lambda x : sorted(set(x)) |
253 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) | 258 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) |
254 ranges = [] | 259 ranges = [] |
255 upper_bound = TransitionKey.__upper_bound + 1 | 260 upper_bound = TransitionKey.__upper_bound + 1 |
256 for i in range(len(range_map)): | 261 for i in range(len(range_map)): |
257 (left, left_values) = range_map[i] | 262 (left, left_values) = range_map[i] |
258 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound | 263 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
333 @staticmethod | 338 @staticmethod |
334 def merged_key(keys): | 339 def merged_key(keys): |
335 f = lambda acc, key: acc + list(key.__ranges) | 340 f = lambda acc, key: acc + list(key.__ranges) |
336 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) | 341 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) |
337 | 342 |
338 @staticmethod | 343 @staticmethod |
339 def __invert_ranges(ranges): | 344 def __invert_ranges(ranges): |
340 inverted = [] | 345 inverted = [] |
341 last = None | 346 last = None |
342 classes = set(TransitionKey.__class_bounds.values()) | 347 classes = set(TransitionKey.__class_bounds.values()) |
343 latin_1 = TransitionKey.__class_bounds["latin_1"] | 348 latin_1 = TransitionKey.__class_bounds['latin_1'] |
344 classes.remove(latin_1) | 349 classes.remove(latin_1) |
345 for r in ranges: | 350 for r in ranges: |
346 assert not TransitionKey.__is_unique_range(r) | 351 assert not TransitionKey.__is_unique_range(r) |
347 if TransitionKey.__is_class_range(r): | 352 if TransitionKey.__is_class_range(r): |
348 classes.remove(r) | 353 classes.remove(r) |
349 continue | 354 continue |
350 if last == None: | 355 if last == None: |
351 if r[0] != TransitionKey.__lower_bound: | 356 if r[0] != TransitionKey.__lower_bound: |
352 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) | 357 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) |
353 elif last[1] + 1 < r[0]: | 358 elif last[1] + 1 < r[0]: |
354 inverted.append((last[1] + 1, r[0] - 1)) | 359 inverted.append((last[1] + 1, r[0] - 1)) |
355 last = r | 360 last = r |
356 upper_bound = latin_1[1] | 361 upper_bound = latin_1[1] |
357 if last == None: | 362 if last == None: |
358 inverted.append(latin_1) | 363 inverted.append(latin_1) |
359 elif last[1] < upper_bound: | 364 elif last[1] < upper_bound: |
360 inverted.append((last[1] + 1, upper_bound)) | 365 inverted.append((last[1] + 1, upper_bound)) |
361 inverted += list(classes) | 366 inverted += list(classes) |
362 return inverted | 367 return inverted |
OLD | NEW |