| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 from string import printable | 28 from string import printable |
| 29 | 29 |
| 30 class TransitionKey: | 30 class TransitionKey: |
| 31 '''Represents a transition from a state in DFA or NFA to another state. |
| 32 |
| 33 A transition key has a list of character ranges and a list of class ranges |
| 34 (e.g., "whitespace"), defining for which characters the transition |
| 35 happens. When we generate code based on the transition key, the character |
| 36 ranges generate simple checks and the class ranges generate more complicated |
| 37 conditions, e.g., function calls.''' |
| 31 | 38 |
| 32 __class_bounds = { | 39 __class_bounds = { |
| 33 'latin_1' : (1, 255), | 40 'latin_1' : (1, 255), |
| 34 # These are not real ranges; they just need to be separate from any real | 41 # These are not real ranges; they just need to be separate from any real |
| 35 # ranges. | 42 # ranges. |
| 36 'whitespace' : (256, 256), | 43 'whitespace' : (256, 256), |
| 37 'literal' : (257, 257), | 44 'literal' : (257, 257), |
| 38 'eos' : (258, 258), | 45 'eos' : (258, 258), |
| 39 'zero' : (259, 259), | 46 'zero' : (259, 259), |
| 40 } | 47 } |
| 41 __lower_bound = 1 | 48 __lower_bound = 1 |
| 42 __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.item
s(), 0) | 49 __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1] |
| 43 | 50 |
| 44 __cached_keys = {} | 51 __cached_keys = {} |
| 45 | 52 |
| 46 __unique_key_counter = -1 | 53 __unique_key_counter = -1 |
| 47 | 54 |
| 48 @staticmethod | 55 @staticmethod |
| 49 def __in_latin_1(char): | 56 def __in_latin_1(char): |
| 50 bound = TransitionKey.__class_bounds['latin_1'] | 57 bound = TransitionKey.__class_bounds['latin_1'] |
| 51 return (bound[0] <= char and char <= bound[1]) | 58 return (bound[0] <= char and char <= bound[1]) |
| 52 | 59 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 71 r_is_class = TransitionKey.__is_class_range(r) | 78 r_is_class = TransitionKey.__is_class_range(r) |
| 72 # Assert that the ranges are in order. | 79 # Assert that the ranges are in order. |
| 73 if last != None and check_merged: | 80 if last != None and check_merged: |
| 74 assert last[1] + 1 < r[0] or r_is_class | 81 assert last[1] + 1 < r[0] or r_is_class |
| 75 if not TransitionKey.__in_latin_1(r[0]): | 82 if not TransitionKey.__in_latin_1(r[0]): |
| 76 assert r_is_class | 83 assert r_is_class |
| 77 if not TransitionKey.__in_latin_1(r[1]): | 84 if not TransitionKey.__in_latin_1(r[1]): |
| 78 assert r_is_class | 85 assert r_is_class |
| 79 last = r | 86 last = r |
| 80 | 87 |
| 81 @staticmethod | |
| 82 def __create(ranges, name = None): | |
| 83 if not ranges: | |
| 84 assert name == 'epsilon' | |
| 85 assert not name in TransitionKey.__cached_keys | |
| 86 else: | |
| 87 TransitionKey.__verify_ranges(ranges, True) | |
| 88 key = TransitionKey() | |
| 89 key.__ranges = tuple(ranges) # immutable | |
| 90 key.__name = name | |
| 91 key.__cached_hash = None | |
| 92 return key | |
| 93 | |
| 94 def __is_unique(self): | 88 def __is_unique(self): |
| 95 return len(self.__ranges) == 1 and self.__is_unique_range(self.__ranges[0]) | 89 return len(self.__ranges) == 1 and self.__is_unique_range(self.__ranges[0]) |
| 96 | 90 |
| 97 @staticmethod | 91 @staticmethod |
| 98 def __cached_key(name, bounds_getter): | 92 def __cached_key(name, bounds_getter): |
| 99 if not name in TransitionKey.__cached_keys: | 93 if not name in TransitionKey.__cached_keys: |
| 100 bounds = bounds_getter(name) | 94 bounds = bounds_getter(name) |
| 101 TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name) | 95 TransitionKey.__cached_keys[name] = TransitionKey(bounds, name) |
| 102 return TransitionKey.__cached_keys[name] | 96 return TransitionKey.__cached_keys[name] |
| 103 | 97 |
| 104 @staticmethod | 98 @staticmethod |
| 105 def epsilon(): | 99 def epsilon(): |
| 100 '''Returns a TransitionKey for the epsilon (empty) transition.''' |
| 106 return TransitionKey.__cached_key('epsilon', lambda name : []) | 101 return TransitionKey.__cached_key('epsilon', lambda name : []) |
| 107 | 102 |
| 108 @staticmethod | 103 @staticmethod |
| 109 def any(): | 104 def any(): |
| 105 '''Returns a TransitionKey which matches any character.''' |
| 110 def bounds_getter(name): | 106 def bounds_getter(name): |
| 111 bounds = TransitionKey.__class_bounds.values() | 107 bounds = TransitionKey.__class_bounds.values() |
| 112 bounds.sort() | 108 bounds.sort() |
| 113 return bounds | 109 return bounds |
| 114 return TransitionKey.__cached_key('any', bounds_getter) | 110 return TransitionKey.__cached_key('any', bounds_getter) |
| 115 | 111 |
| 116 @staticmethod | 112 @staticmethod |
| 117 def single_char(char): | 113 def single_char(char): |
| 114 '''Returns a TransitionKey for a single-character transition.''' |
| 118 char = ord(char) | 115 char = ord(char) |
| 119 assert TransitionKey.__in_latin_1(char) | 116 assert TransitionKey.__in_latin_1(char) |
| 120 return TransitionKey.__create([(char, char)]) | 117 return TransitionKey([(char, char)]) |
| 121 | 118 |
| 122 @staticmethod | 119 @staticmethod |
| 123 def unique(name): | 120 def unique(name): |
| 121 '''Returns a unique TransitionKey for the given name (and creates it if it |
| 122 doesn't exist yet). The TransitionKey won't have any real character range, |
| 123 but a newly-created "mock" character range which is separate from all other |
| 124 character ranges.''' |
| 124 def get_bounds(name): | 125 def get_bounds(name): |
| 125 bound = TransitionKey.__unique_key_counter | 126 bound = TransitionKey.__unique_key_counter |
| 126 TransitionKey.__unique_key_counter -= 1 | 127 TransitionKey.__unique_key_counter -= 1 |
| 127 return [(bound, bound)] | 128 return [(bound, bound)] |
| 128 name = '__' + name | 129 name = '__' + name |
| 129 return TransitionKey.__cached_key(name, get_bounds) | 130 return TransitionKey.__cached_key(name, get_bounds) |
| 130 | 131 |
| 131 @staticmethod | 132 @staticmethod |
| 132 def __process_graph(graph, ranges, key_map): | 133 def __process_graph(graph, ranges, key_map): |
| 133 key = graph[0] | 134 key = graph[0] |
| (...skipping 16 matching lines...) Expand all Loading... |
| 150 ranges.append(TransitionKey.__class_bounds['zero']) | 151 ranges.append(TransitionKey.__class_bounds['zero']) |
| 151 elif class_name in key_map: | 152 elif class_name in key_map: |
| 152 ranges += key_map[class_name].__ranges | 153 ranges += key_map[class_name].__ranges |
| 153 else: | 154 else: |
| 154 raise Exception('unknown character class [%s]' % graph[1]) | 155 raise Exception('unknown character class [%s]' % graph[1]) |
| 155 else: | 156 else: |
| 156 raise Exception('bad key [%s]' % key) | 157 raise Exception('bad key [%s]' % key) |
| 157 | 158 |
| 158 @staticmethod | 159 @staticmethod |
| 159 def character_class(graph, key_map): | 160 def character_class(graph, key_map): |
| 161 '''Processes 'graph' (a representation of a character class in the rule |
| 162 file), and constructs a TransitionKey based on it. 'key_map' contains |
| 163 already constructed aliases for character classes (they can be used in the |
| 164 new character class). It is a map from strings (character class names) to |
| 165 TransitionKeys. For example, graph might represent the character class |
| 166 [a-z:digit:] where 'digit' is a previously constructed characte class found |
| 167 in "key_map".''' |
| 160 ranges = [] | 168 ranges = [] |
| 161 assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS' | 169 assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS' |
| 162 invert = graph[0] == 'NOT_CLASS' | 170 invert = graph[0] == 'NOT_CLASS' |
| 163 TransitionKey.__process_graph(graph[1], ranges, key_map) | 171 TransitionKey.__process_graph(graph[1], ranges, key_map) |
| 164 return TransitionKey.__key_from_ranges(invert, ranges) | 172 return TransitionKey.__key_from_ranges(invert, ranges) |
| 165 | 173 |
| 166 def matches_char(self, char): | 174 def matches_char(self, char): |
| 167 char = ord(char) | 175 char = ord(char) |
| 168 # TODO class checks | 176 # TODO class checks |
| 169 for r in self.__ranges: | 177 for r in self.__ranges: |
| 170 if r[0] <= char and char <= r[1]: return True | 178 if r[0] <= char and char <= r[1]: return True |
| 171 return False | 179 return False |
| 172 | 180 |
| 173 def matches_key(self, key): | 181 def is_superset_of_key(self, key): |
| 182 '''Returns true if 'key' is a sub-key of this TransitionKey.''' |
| 174 assert isinstance(key, self.__class__) | 183 assert isinstance(key, self.__class__) |
| 175 assert key != TransitionKey.epsilon() and not key.__is_unique() | 184 assert key != TransitionKey.epsilon() and not key.__is_unique() |
| 176 assert len(key.__ranges) == 1 | 185 assert len(key.__ranges) == 1 |
| 177 subkey = key.__ranges[0] | 186 subkey = key.__ranges[0] |
| 178 matches = False | 187 matches = False |
| 179 for k in self.__ranges: | 188 for k in self.__ranges: |
| 180 if k[0] <= subkey[0]: | 189 if k[0] <= subkey[0]: |
| 181 assert subkey[1] <= k[1] or subkey[0] > k[1] | 190 assert subkey[1] <= k[1] or subkey[0] > k[1] |
| 182 if subkey[0] < k[0]: | 191 if subkey[0] < k[0]: |
| 183 assert subkey[1] < k[0] | 192 assert subkey[1] < k[0] |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 240 assert TransitionKey.__in_latin_1(x) | 249 assert TransitionKey.__in_latin_1(x) |
| 241 if not x in TransitionKey.__printable_cache: | 250 if not x in TransitionKey.__printable_cache: |
| 242 res = "'%s'" % chr(x) if chr(x) in printable else str(x) | 251 res = "'%s'" % chr(x) if chr(x) in printable else str(x) |
| 243 TransitionKey.__printable_cache[x] = res | 252 TransitionKey.__printable_cache[x] = res |
| 244 return TransitionKey.__printable_cache[x] | 253 return TransitionKey.__printable_cache[x] |
| 245 if r[0] == r[1]: | 254 if r[0] == r[1]: |
| 246 return '%s' % to_str(r[0]) | 255 return '%s' % to_str(r[0]) |
| 247 else: | 256 else: |
| 248 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) | 257 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) |
| 249 | 258 |
| 259 def __init__(self, ranges, name = None): |
| 260 if not ranges: |
| 261 assert name == 'epsilon' |
| 262 assert not name in TransitionKey.__cached_keys |
| 263 else: |
| 264 TransitionKey.__verify_ranges(ranges, True) |
| 265 self.__name = name |
| 266 self.__ranges = tuple(ranges) # immutable |
| 267 self.__cached_hash = None |
| 268 |
| 250 def __str__(self): | 269 def __str__(self): |
| 251 if self.__name: | 270 if self.__name: |
| 252 return self.__name | 271 return self.__name |
| 253 return ', '.join(TransitionKey.__range_str(x) for x in self.__ranges) | 272 return ', '.join(TransitionKey.__range_str(x) for x in self.__ranges) |
| 254 | 273 |
| 255 @staticmethod | 274 @staticmethod |
| 256 def __disjoint_keys(range_map): | 275 def __disjoint_keys(range_map): |
| 276 '''Takes a set of possibly overlapping ranges, returns a list of ranges |
| 277 which don't overlap and which cover the same points as the original |
| 278 set. range_map is a map from lower bounds to a list of upper bounds.''' |
| 257 sort = lambda x : sorted(set(x)) | 279 sort = lambda x : sorted(set(x)) |
| 258 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) | 280 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) |
| 259 ranges = [] | 281 ranges = [] |
| 260 upper_bound = TransitionKey.__upper_bound + 1 | 282 upper_bound = TransitionKey.__upper_bound + 1 |
| 261 for i in range(len(range_map)): | 283 for i in range(len(range_map)): |
| 262 (left, left_values) = range_map[i] | 284 (left, left_values) = range_map[i] |
| 263 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound | 285 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound |
| 264 to_push = [] | 286 to_push = [] |
| 265 for v in left_values: | 287 for v in left_values: |
| 266 assert left <= next | 288 assert left <= next |
| (...skipping 20 matching lines...) Expand all Loading... |
| 287 for r in x.__ranges: | 309 for r in x.__ranges: |
| 288 if not r[0] in range_map: | 310 if not r[0] in range_map: |
| 289 range_map[r[0]] = [] | 311 range_map[r[0]] = [] |
| 290 range_map[r[0]].append(r[1]) | 312 range_map[r[0]].append(r[1]) |
| 291 ranges = TransitionKey.__disjoint_keys(range_map) | 313 ranges = TransitionKey.__disjoint_keys(range_map) |
| 292 TransitionKey.__verify_ranges(ranges, False) | 314 TransitionKey.__verify_ranges(ranges, False) |
| 293 return ranges | 315 return ranges |
| 294 | 316 |
| 295 @staticmethod | 317 @staticmethod |
| 296 def disjoint_keys(key_set): | 318 def disjoint_keys(key_set): |
| 319 '''Takes a set of possibly overlapping TransitionKeys, returns a list of |
| 320 TransitionKeys which don't overlap and whose union is the same as the union |
| 321 of the original key_set. key_set is a set of TransitionKeys.''' |
| 297 ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set) | 322 ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set) |
| 298 return map(lambda x : TransitionKey.__create([x]), ranges) | 323 return map(lambda x : TransitionKey([x]), ranges) |
| 299 | 324 |
| 300 @staticmethod | 325 @staticmethod |
| 301 def inverse_key(key_set): | 326 def inverse_key(key_set): |
| 327 '''Returns a TransitionKey which matches represents the inverse of the union |
| 328 of 'key_set'. The TransitionKeys contain a set of character ranges and a set |
| 329 of classes. The character ranges are inverted in relation to the latin_1 |
| 330 character range, and the character classes are inverted in relation to all |
| 331 character classes in __class_bounds.''' |
| 302 ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set) | 332 ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set) |
| 303 inverse = TransitionKey.__invert_ranges(ranges) | 333 inverse = TransitionKey.__invert_ranges(ranges) |
| 304 if not inverse: | 334 if not inverse: |
| 305 return None | 335 return None |
| 306 return TransitionKey.__create(inverse) | 336 return TransitionKey(inverse) |
| 307 | 337 |
| 308 @staticmethod | 338 @staticmethod |
| 309 def __key_from_ranges(invert, ranges): | 339 def __key_from_ranges(invert, ranges): |
| 310 range_map = {} | 340 range_map = {} |
| 311 for r in ranges: | 341 for r in ranges: |
| 312 if not r[0] in range_map: | 342 if not r[0] in range_map: |
| 313 range_map[r[0]] = [] | 343 range_map[r[0]] = [] |
| 314 range_map[r[0]].append(r[1]) | 344 range_map[r[0]].append(r[1]) |
| 315 ranges = TransitionKey.__disjoint_keys(range_map) | 345 ranges = TransitionKey.__disjoint_keys(range_map) |
| 316 ranges = TransitionKey.__merge_ranges(ranges) | 346 ranges = TransitionKey.__merge_ranges(ranges) |
| 317 if invert: | 347 if invert: |
| 318 ranges = TransitionKey.__invert_ranges(ranges) | 348 ranges = TransitionKey.__invert_ranges(ranges) |
| 319 return TransitionKey.__create(ranges) | 349 return TransitionKey(ranges) |
| 320 | 350 |
| 321 @staticmethod | 351 @staticmethod |
| 322 def __merge_ranges(ranges): | 352 def __merge_ranges(ranges): |
| 323 merged = [] | 353 merged = [] |
| 324 last = None | 354 last = None |
| 325 for r in ranges: | 355 for r in ranges: |
| 326 assert not TransitionKey.__is_unique_range(r) | 356 assert not TransitionKey.__is_unique_range(r) |
| 327 if last == None: | 357 if last == None: |
| 328 last = r | 358 last = r |
| 329 elif (last[1] + 1 == r[0] and not TransitionKey.__is_class_range(r)): | 359 elif (last[1] + 1 == r[0] and not TransitionKey.__is_class_range(r)): |
| 330 last = (last[0], r[1]) | 360 last = (last[0], r[1]) |
| 331 else: | 361 else: |
| 332 merged.append(last) | 362 merged.append(last) |
| 333 last = r | 363 last = r |
| 334 if last != None: | 364 if last != None: |
| 335 merged.append(last) | 365 merged.append(last) |
| 336 return merged | 366 return merged |
| 337 | 367 |
| 338 @staticmethod | 368 @staticmethod |
| 339 def merged_key(keys): | 369 def merged_key(keys): |
| 340 f = lambda acc, key: acc + list(key.__ranges) | 370 f = lambda acc, key: acc + list(key.__ranges) |
| 341 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) | 371 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) |
| 342 | 372 |
| 343 @staticmethod | 373 @staticmethod |
| 344 def __invert_ranges(ranges): | 374 def __invert_ranges(ranges): |
| 345 inverted = [] | 375 inverted = [] |
| 346 last = None | 376 last = None |
| 377 # Extract character classes (as opposed to character ranges) from |
| 378 # __class_bounds. Since latin_1 is the only real character range, we can do |
| 379 # this. |
| 347 classes = set(TransitionKey.__class_bounds.values()) | 380 classes = set(TransitionKey.__class_bounds.values()) |
| 348 latin_1 = TransitionKey.__class_bounds['latin_1'] | 381 latin_1 = TransitionKey.__class_bounds['latin_1'] |
| 349 classes.remove(latin_1) | 382 classes.remove(latin_1) |
| 350 for r in ranges: | 383 for r in ranges: |
| 351 assert not TransitionKey.__is_unique_range(r) | 384 assert not TransitionKey.__is_unique_range(r) |
| 352 if TransitionKey.__is_class_range(r): | 385 if TransitionKey.__is_class_range(r): |
| 353 classes.remove(r) | 386 classes.remove(r) |
| 354 continue | 387 continue |
| 355 if last == None: | 388 if last == None: |
| 356 if r[0] != TransitionKey.__lower_bound: | 389 if r[0] != TransitionKey.__lower_bound: |
| 357 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) | 390 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) |
| 358 elif last[1] + 1 < r[0]: | 391 elif last[1] + 1 < r[0]: |
| 359 inverted.append((last[1] + 1, r[0] - 1)) | 392 inverted.append((last[1] + 1, r[0] - 1)) |
| 360 last = r | 393 last = r |
| 361 upper_bound = latin_1[1] | 394 upper_bound = latin_1[1] |
| 362 if last == None: | 395 if last == None: |
| 363 inverted.append(latin_1) | 396 inverted.append(latin_1) |
| 364 elif last[1] < upper_bound: | 397 elif last[1] < upper_bound: |
| 365 inverted.append((last[1] + 1, upper_bound)) | 398 inverted.append((last[1] + 1, upper_bound)) |
| 366 inverted += list(classes) | 399 inverted += list(classes) |
| 367 return inverted | 400 return inverted |
| OLD | NEW |