| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 from string import printable | 28 from string import printable |
| 29 | 29 |
| 30 class TransitionKey: | 30 class TransitionKey: |
| 31 | 31 |
| 32 __class_bounds = { |
| 33 "latin_1" : (0, 255), |
| 34 "whitespace" : (256, 256), |
| 35 "literal" : (257, 257), |
| 36 } |
| 32 __lower_bound = 0 | 37 __lower_bound = 0 |
| 33 __latin_1_upper_bound = 255 | 38 __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.item
s(), 0) |
| 34 __unicode_whitespace_bounds = (256, 256) | 39 |
| 35 __unicode_literal_bounds = (257, 257) | 40 __cached_keys = {} |
| 36 __upper_bound = 257 | 41 |
| 42 __unique_key_counter = -1 |
| 43 |
| 44 @staticmethod |
| 45 def __in_latin_1(char): |
| 46 bound = TransitionKey.__class_bounds["latin_1"] |
| 47 return (bound[0] <= char and char <= bound[1]) |
| 48 |
| 49 @staticmethod |
| 50 def __is_class_range(r): |
| 51 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0]) |
| 52 |
| 53 @staticmethod |
| 54 def __is_unique_range(r): |
| 55 return r[0] == r[1] and r[0] < TransitionKey.__lower_bound |
| 37 | 56 |
| 38 @staticmethod | 57 @staticmethod |
| 39 def __verify_ranges(ranges, check_merged): | 58 def __verify_ranges(ranges, check_merged): |
| 40 assert ranges | 59 assert ranges |
| 60 if len(ranges) == 1 and TransitionKey.__is_class_range(ranges[0]): |
| 61 return |
| 41 last = None | 62 last = None |
| 42 for r in ranges: | 63 for r in ranges: |
| 43 assert TransitionKey.__lower_bound <= r[0] | 64 assert TransitionKey.__lower_bound <= r[0] |
| 44 assert r[1] <= TransitionKey.__upper_bound | 65 assert r[1] <= TransitionKey.__upper_bound |
| 45 assert r[0] <= r[1] | 66 assert r[0] <= r[1] |
| 46 r_is_class = ( | 67 r_is_class = TransitionKey.__is_class_range(r) |
| 47 r == TransitionKey.__unicode_whitespace_bounds or | |
| 48 r == TransitionKey.__unicode_literal_bounds) | |
| 49 if last != None and check_merged: | 68 if last != None and check_merged: |
| 50 assert last[1] + 1 < r[0] or r_is_class | 69 assert last[1] + 1 < r[0] or r_is_class |
| 51 if (r[0] > TransitionKey.__latin_1_upper_bound or | 70 if not TransitionKey.__in_latin_1(r[0]): |
| 52 r[1] > TransitionKey.__latin_1_upper_bound): | 71 assert r_is_class |
| 72 if not TransitionKey.__in_latin_1(r[1]): |
| 53 assert r_is_class | 73 assert r_is_class |
| 54 last = r | 74 last = r |
| 55 | 75 |
| 56 @staticmethod | 76 @staticmethod |
| 57 def __create(ranges): | 77 def __create(ranges, name = None): |
| 58 TransitionKey.__verify_ranges(ranges, True) | 78 if not ranges: |
| 79 assert name == 'epsilon' |
| 80 assert not name in TransitionKey.__cached_keys |
| 81 else: |
| 82 TransitionKey.__verify_ranges(ranges, True) |
| 59 key = TransitionKey() | 83 key = TransitionKey() |
| 60 key.__ranges = tuple(ranges) # immutable | 84 key.__ranges = tuple(ranges) # immutable |
| 85 key.__name = name |
| 61 key.__cached_hash = None | 86 key.__cached_hash = None |
| 62 return key | 87 return key |
| 63 | 88 |
| 64 __cached_epsilon = None | 89 @staticmethod |
| 90 def __cached_key(name, bounds_getter): |
| 91 if not name in TransitionKey.__cached_keys: |
| 92 bounds = bounds_getter(name) |
| 93 TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name) |
| 94 return TransitionKey.__cached_keys[name] |
| 95 |
| 65 @staticmethod | 96 @staticmethod |
| 66 def epsilon(): | 97 def epsilon(): |
| 67 if (TransitionKey.__cached_epsilon == None): | 98 return TransitionKey.__cached_key("epsilon", lambda name : []) |
| 68 key = TransitionKey() | |
| 69 key.__ranges = tuple([]) | |
| 70 key.__cached_hash = None | |
| 71 TransitionKey.__cached_epsilon = key | |
| 72 return TransitionKey.__cached_epsilon | |
| 73 | 99 |
| 74 __cached_any = None | |
| 75 @staticmethod | 100 @staticmethod |
| 76 def any(): | 101 def any(): |
| 77 if (TransitionKey.__cached_any == None): | 102 return TransitionKey.__cached_key("any", |
| 78 bounds = [ | 103 lambda name : TransitionKey.__class_bounds.values()) |
| 79 (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound), | |
| 80 TransitionKey.__unicode_whitespace_bounds, | |
| 81 TransitionKey.__unicode_literal_bounds, | |
| 82 ] | |
| 83 TransitionKey.__cached_any = TransitionKey.__create(bounds) | |
| 84 return TransitionKey.__cached_any | |
| 85 | 104 |
| 86 @staticmethod | 105 @staticmethod |
| 87 def single_char(char): | 106 def single_char(char): |
| 88 char = ord(char) | 107 char = ord(char) |
| 89 assert (TransitionKey.__lower_bound <= char and | 108 assert TransitionKey.__in_latin_1(char) |
| 90 char <= TransitionKey.__latin_1_upper_bound) | |
| 91 return TransitionKey.__create([(char, char)]) | 109 return TransitionKey.__create([(char, char)]) |
| 92 | 110 |
| 93 @staticmethod | 111 @staticmethod |
| 112 def unique_key(name): |
| 113 def get_bounds(name): |
| 114 bound = TransitionKey.__unique_key_counter |
| 115 TransitionKey.__unique_key_counter -= 1 |
| 116 return [(bound, bound)] |
| 117 name = "__" + name |
| 118 return TransitionKey.__cached_key(name, get_bounds) |
| 119 |
| 120 @staticmethod |
| 94 def __process_graph(graph, ranges, key_map): | 121 def __process_graph(graph, ranges, key_map): |
| 95 key = graph[0] | 122 key = graph[0] |
| 96 if key == 'RANGE': | 123 if key == 'RANGE': |
| 97 ranges.append((ord(graph[1]), ord(graph[2]))) | 124 ranges.append((ord(graph[1]), ord(graph[2]))) |
| 98 elif key == 'LITERAL': | 125 elif key == 'LITERAL': |
| 99 ranges.append((ord(graph[1]), ord(graph[1]))) | 126 ranges.append((ord(graph[1]), ord(graph[1]))) |
| 100 elif key == 'CAT': | 127 elif key == 'CAT': |
| 101 for x in [graph[1], graph[2]]: | 128 for x in [graph[1], graph[2]]: |
| 102 TransitionKey.__process_graph(x, ranges, key_map) | 129 TransitionKey.__process_graph(x, ranges, key_map) |
| 103 elif key == 'CHARACTER_CLASS': | 130 elif key == 'CHARACTER_CLASS': |
| 104 class_name = graph[1] | 131 class_name = graph[1] |
| 105 if class_name == 'ws': | 132 if class_name == 'ws': |
| 106 ranges.append(TransitionKey.__unicode_whitespace_bounds) | 133 ranges.append(TransitionKey.__class_bounds["whitespace"]) |
| 107 elif class_name == 'lit': | 134 elif class_name == 'lit': |
| 108 ranges.append(TransitionKey.__unicode_literal_bounds) | 135 ranges.append(TransitionKey.__class_bounds["literal"]) |
| 109 elif class_name in key_map: | 136 elif class_name in key_map: |
| 110 ranges += key_map[class_name].__ranges | 137 ranges += key_map[class_name].__ranges |
| 111 else: | 138 else: |
| 112 raise Exception("unknown character class [%s]" % graph[1]) | 139 raise Exception("unknown character class [%s]" % graph[1]) |
| 113 else: | 140 else: |
| 114 raise Exception("bad key [%s]" % key) | 141 raise Exception("bad key [%s]" % key) |
| 115 | 142 |
| 116 @staticmethod | 143 @staticmethod |
| 117 def character_class(graph, key_map): | 144 def character_class(graph, key_map): |
| 118 ranges = [] | 145 ranges = [] |
| (...skipping 29 matching lines...) Expand all Loading... |
| 148 def __eq__(self, other): | 175 def __eq__(self, other): |
| 149 return isinstance(other, self.__class__) and self.__ranges == other.__ranges | 176 return isinstance(other, self.__class__) and self.__ranges == other.__ranges |
| 150 | 177 |
| 151 __printable_cache = { | 178 __printable_cache = { |
| 152 ord('\t') : '\\t', | 179 ord('\t') : '\\t', |
| 153 ord('\n') : '\\n', | 180 ord('\n') : '\\n', |
| 154 ord('\r') : '\\r', | 181 ord('\r') : '\\r', |
| 155 } | 182 } |
| 156 | 183 |
| 157 @staticmethod | 184 @staticmethod |
| 158 def __print_range(r): | 185 def __range_str(r): |
| 186 if TransitionKey.__is_class_range(r): |
| 187 for name, v in TransitionKey.__class_bounds.items(): |
| 188 if r == v: return name |
| 189 assert False |
| 159 def to_str(x): | 190 def to_str(x): |
| 160 if x <= TransitionKey.__latin_1_upper_bound: | 191 assert TransitionKey.__in_latin_1(x) |
| 161 if not x in TransitionKey.__printable_cache: | 192 if not x in TransitionKey.__printable_cache: |
| 162 res = "'%s'" % chr(x) if chr(x) in printable else str(x) | 193 res = "'%s'" % chr(x) if chr(x) in printable else str(x) |
| 163 TransitionKey.__printable_cache[x] = res | 194 TransitionKey.__printable_cache[x] = res |
| 164 return TransitionKey.__printable_cache[x] | 195 return TransitionKey.__printable_cache[x] |
| 165 if x == TransitionKey.__unicode_literal_bounds[0]: | |
| 166 return "literal" | |
| 167 if x == TransitionKey.__unicode_whitespace_bounds[0]: | |
| 168 return "whitespace" | |
| 169 assert False | |
| 170 if r[0] == r[1]: | 196 if r[0] == r[1]: |
| 171 return "%s" % to_str(r[0]) | 197 return "%s" % to_str(r[0]) |
| 172 else: | 198 else: |
| 173 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) | 199 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) |
| 174 | 200 |
| 175 def __str__(self): | 201 def __str__(self): |
| 176 if self == self.epsilon(): | 202 if self.__name: |
| 177 return "epsilon" | 203 return self.__name |
| 178 if self == self.any(): | 204 return ", ".join(TransitionKey.__range_str(x) for x in self.__ranges) |
| 179 return "any" | |
| 180 return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges) | |
| 181 | 205 |
| 182 @staticmethod | 206 @staticmethod |
| 183 def __disjoint_keys(range_map): | 207 def __disjoint_keys(range_map): |
| 184 sort = lambda x : sorted(set(x)) | 208 sort = lambda x : sorted(set(x)) |
| 185 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) | 209 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) |
| 186 ranges = [] | 210 ranges = [] |
| 187 upper_bound = TransitionKey.__upper_bound + 1 | 211 upper_bound = TransitionKey.__upper_bound + 1 |
| 188 for i in range(len(range_map)): | 212 for i in range(len(range_map)): |
| 189 (left, left_values) = range_map[i] | 213 (left, left_values) = range_map[i] |
| 190 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound | 214 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 229 ranges = TransitionKey.__merge_ranges(ranges) | 253 ranges = TransitionKey.__merge_ranges(ranges) |
| 230 if invert: | 254 if invert: |
| 231 ranges = TransitionKey.__invert_ranges(ranges) | 255 ranges = TransitionKey.__invert_ranges(ranges) |
| 232 return TransitionKey.__create(ranges) | 256 return TransitionKey.__create(ranges) |
| 233 | 257 |
| 234 @staticmethod | 258 @staticmethod |
| 235 def __merge_ranges(ranges): | 259 def __merge_ranges(ranges): |
| 236 merged = [] | 260 merged = [] |
| 237 last = None | 261 last = None |
| 238 for r in ranges: | 262 for r in ranges: |
| 263 assert not TransitionKey.__is_unique_range(r) |
| 239 if last == None: | 264 if last == None: |
| 240 last = r | 265 last = r |
| 241 elif (last[1] + 1 == r[0] and | 266 elif (last[1] + 1 == r[0] and not TransitionKey.__is_class_range(r)): |
| 242 r != TransitionKey.__unicode_whitespace_bounds and | |
| 243 r != TransitionKey.__unicode_literal_bounds): | |
| 244 last = (last[0], r[1]) | 267 last = (last[0], r[1]) |
| 245 else: | 268 else: |
| 246 merged.append(last) | 269 merged.append(last) |
| 247 last = r | 270 last = r |
| 248 if last != None: | 271 if last != None: |
| 249 merged.append(last) | 272 merged.append(last) |
| 250 return merged | 273 return merged |
| 251 | 274 |
| 252 @staticmethod | 275 @staticmethod |
| 253 def merged_key(keys): | 276 def merged_key(keys): |
| 254 f = lambda acc, key: acc + list(key.__ranges) | 277 f = lambda acc, key: acc + list(key.__ranges) |
| 255 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) | 278 return TransitionKey.__key_from_ranges(False, reduce(f, keys, [])) |
| 256 | 279 |
| 257 @staticmethod | 280 @staticmethod |
| 258 def __invert_ranges(ranges): | 281 def __invert_ranges(ranges): |
| 259 inverted = [] | 282 inverted = [] |
| 260 last = None | 283 last = None |
| 261 contains_whitespace = False | 284 classes = set(TransitionKey.__class_bounds.values()) |
| 262 contains_literal = False | 285 classes.remove(TransitionKey.__class_bounds["latin_1"]) |
| 263 for r in ranges: | 286 for r in ranges: |
| 264 if r == TransitionKey.__unicode_whitespace_bounds: | 287 assert not TransitionKey.__is_unique_range(r) |
| 265 contains_whitespace = True | 288 if TransitionKey.__is_class_range(r): |
| 266 continue | 289 classes.remove(r) |
| 267 if r == TransitionKey.__unicode_literal_bounds: | |
| 268 contains_literal = True | |
| 269 continue | 290 continue |
| 270 if last == None: | 291 if last == None: |
| 271 if r[0] != TransitionKey.__lower_bound: | 292 if r[0] != TransitionKey.__lower_bound: |
| 272 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) | 293 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) |
| 273 elif last[1] + 1 < r[0]: | 294 elif last[1] + 1 < r[0]: |
| 274 inverted.append((last[1] + 1, r[0] - 1)) | 295 inverted.append((last[1] + 1, r[0] - 1)) |
| 275 last = r | 296 last = r |
| 276 if last != None and last[1] < TransitionKey.__latin_1_upper_bound: | 297 upper_bound = TransitionKey.__class_bounds["latin_1"][1] |
| 277 inverted.append((last[1] + 1, TransitionKey.__latin_1_upper_bound)) | 298 if last != None and last[1] < upper_bound: |
| 278 if not contains_whitespace: | 299 inverted.append((last[1] + 1, upper_bound)) |
| 279 inverted.append(TransitionKey.__unicode_whitespace_bounds) | 300 inverted += list(classes) |
| 280 if not contains_literal: | |
| 281 inverted.append(TransitionKey.__unicode_literal_bounds) | |
| 282 return inverted | 301 return inverted |
| OLD | NEW |