| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 |
| 27 from string import printable | 28 from string import printable |
| 28 | 29 |
| 29 class TransitionKey: | 30 class TransitionKey: |
| 30 | 31 |
| 31 __lower_bound = 0 | 32 __lower_bound = 0 |
| 32 __latin_1_upper_bound = 255 | 33 __latin_1_upper_bound = 255 |
| 33 __unicode_whitespace_bounds = (256, 256) | 34 __unicode_whitespace_bounds = (256, 256) |
| 34 __unicode_literal_bounds = (257, 257) | 35 __unicode_literal_bounds = (257, 257) |
| 35 __upper_bound = 257 | 36 __upper_bound = 257 |
| 36 | 37 |
| 37 @staticmethod | 38 @staticmethod |
| 38 def __verify_ranges(ranges): | 39 def __verify_ranges(ranges, check_merged): |
| 39 last = (TransitionKey.__lower_bound - 1, TransitionKey.__lower_bound - 1) | 40 assert ranges |
| 41 last = None |
| 40 for r in ranges: | 42 for r in ranges: |
| 41 assert TransitionKey.__lower_bound <= r[0] | 43 assert TransitionKey.__lower_bound <= r[0] |
| 42 assert r[1] <= TransitionKey.__upper_bound | 44 assert r[1] <= TransitionKey.__upper_bound |
| 43 assert r[0] <= r[1] | 45 assert r[0] <= r[1] |
| 44 assert last[1] < r[0] | 46 r_is_class = ( |
| 45 # TODO check classes are always disjoint points | 47 r == TransitionKey.__unicode_whitespace_bounds or |
| 48 r == TransitionKey.__unicode_literal_bounds) |
| 49 if last != None and check_merged: |
| 50 assert last[1] + 1 < r[0] or r_is_class |
| 51 if (r[0] > TransitionKey.__latin_1_upper_bound or |
| 52 r[1] > TransitionKey.__latin_1_upper_bound): |
| 53 assert r_is_class |
| 46 last = r | 54 last = r |
| 47 | 55 |
| 48 @staticmethod | 56 @staticmethod |
| 49 def __create(ranges): | 57 def __create(ranges): |
| 50 TransitionKey.__verify_ranges(ranges) | 58 TransitionKey.__verify_ranges(ranges, True) |
| 51 key = TransitionKey() | 59 key = TransitionKey() |
| 52 key.__ranges = tuple(ranges) # immutable | 60 key.__ranges = tuple(ranges) # immutable |
| 53 key.__cached_hash = None | 61 key.__cached_hash = None |
| 54 return key | 62 return key |
| 55 | 63 |
| 56 __cached_epsilon = None | 64 __cached_epsilon = None |
| 57 @staticmethod | 65 @staticmethod |
| 58 def epsilon(): | 66 def epsilon(): |
| 59 if (TransitionKey.__cached_epsilon == None): | 67 if (TransitionKey.__cached_epsilon == None): |
| 60 TransitionKey.__cached_epsilon = TransitionKey.__create([]) | 68 key = TransitionKey() |
| 69 key.__ranges = tuple([]) |
| 70 key.__cached_hash = None |
| 71 TransitionKey.__cached_epsilon = key |
| 61 return TransitionKey.__cached_epsilon | 72 return TransitionKey.__cached_epsilon |
| 62 | 73 |
| 63 __cached_any = None | 74 __cached_any = None |
| 64 @staticmethod | 75 @staticmethod |
| 65 def any(): | 76 def any(): |
| 66 if (TransitionKey.__cached_any == None): | 77 if (TransitionKey.__cached_any == None): |
| 67 bounds = [ | 78 bounds = [ |
| 68 (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound), | 79 (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound), |
| 69 TransitionKey.__unicode_whitespace_bounds, | 80 TransitionKey.__unicode_whitespace_bounds, |
| 70 TransitionKey.__unicode_literal_bounds, | 81 TransitionKey.__unicode_literal_bounds, |
| 71 ] | 82 ] |
| 72 TransitionKey.__cached_any = TransitionKey.__create(bounds) | 83 TransitionKey.__cached_any = TransitionKey.__create(bounds) |
| 73 return TransitionKey.__cached_any | 84 return TransitionKey.__cached_any |
| 74 | 85 |
| 75 @staticmethod | 86 @staticmethod |
| 76 def single_char(char): | 87 def single_char(char): |
| 77 char = ord(char) | 88 char = ord(char) |
| 78 assert (TransitionKey.__lower_bound <= char and | 89 assert (TransitionKey.__lower_bound <= char and |
| 79 char <= TransitionKey.__latin_1_upper_bound) | 90 char <= TransitionKey.__latin_1_upper_bound) |
| 80 return TransitionKey.__create([(char, char)]) | 91 return TransitionKey.__create([(char, char)]) |
| 81 | 92 |
| 82 @staticmethod | 93 @staticmethod |
| 94 def __process_graph(graph, ranges): |
| 95 key = graph[0] |
| 96 if key == 'RANGE': |
| 97 ranges.append((ord(graph[1]), ord(graph[2]))) |
| 98 elif key == 'LITERAL': |
| 99 ranges.append((ord(graph[1]), ord(graph[1]))) |
| 100 elif key == 'CAT': |
| 101 for x in [graph[1], graph[2]]: |
| 102 TransitionKey.__process_graph(x, ranges) |
| 103 else: |
| 104 assert False, "bad key %s" % key |
| 105 |
| 106 @staticmethod |
| 83 def character_class(invert, graph): | 107 def character_class(invert, graph): |
| 84 # TODO | 108 ranges = [] |
| 85 return TransitionKey.__create([(129, 129)]) | 109 TransitionKey.__process_graph(graph, ranges) |
| 110 return TransitionKey.__key_from_ranges(invert, ranges) |
| 86 | 111 |
| 87 def matches_char(self, char): | 112 def matches_char(self, char): |
| 88 char = ord(char) | 113 char = ord(char) |
| 89 # TODO class checks | 114 # TODO class checks |
| 90 for r in self.__ranges: | 115 for r in self.__ranges: |
| 91 if r[0] <= char and char <= r[1]: return True | 116 if r[0] <= char and char <= r[1]: return True |
| 92 return False | 117 return False |
| 93 | 118 |
| 94 def matches_key(self, key): | 119 def matches_key(self, key): |
| 95 assert isinstance(key, self.__class__) | 120 assert isinstance(key, self.__class__) |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 132 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) | 157 return "[%s-%s]" % (to_str(r[0]), to_str(r[1])) |
| 133 | 158 |
| 134 def __str__(self): | 159 def __str__(self): |
| 135 if self == self.epsilon(): | 160 if self == self.epsilon(): |
| 136 return "epsilon" | 161 return "epsilon" |
| 137 if self == self.any(): | 162 if self == self.any(): |
| 138 return "any" | 163 return "any" |
| 139 return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges) | 164 return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges) |
| 140 | 165 |
| 141 @staticmethod | 166 @staticmethod |
| 142 def disjoint_keys(key_set): | 167 def __disjoint_keys(range_map): |
| 143 range_map = {} | |
| 144 for x in key_set: | |
| 145 for r in x.__ranges: | |
| 146 if not r[0] in range_map: | |
| 147 range_map[r[0]] = [] | |
| 148 range_map[r[0]].append(r[1]) | |
| 149 sort = lambda x : sorted(set(x)) | 168 sort = lambda x : sorted(set(x)) |
| 150 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) | 169 range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items())) |
| 151 ranges = [] | 170 ranges = [] |
| 152 upper_bound = TransitionKey.__upper_bound + 1 | 171 upper_bound = TransitionKey.__upper_bound + 1 |
| 153 for i in range(len(range_map)): | 172 for i in range(len(range_map)): |
| 154 (left, left_values) = range_map[i] | 173 (left, left_values) = range_map[i] |
| 155 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound | 174 next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound |
| 156 to_push = [] | 175 to_push = [] |
| 157 for v in left_values: | 176 for v in left_values: |
| 158 if v >= next: | 177 if v >= next: |
| 159 if not to_push: | 178 if not to_push: |
| 160 ranges.append((left, next - 1)) | 179 ranges.append((left, next - 1)) |
| 161 to_push.append(v) | 180 to_push.append(v) |
| 162 else: | 181 else: |
| 163 ranges.append((left, v)) | 182 ranges.append((left, v)) |
| 164 left = v + 1 | 183 left = v + 1 |
| 165 if to_push: | 184 if to_push: |
| 166 current = range_map[i + 1] | 185 current = range_map[i + 1] |
| 167 range_map[i + 1] = (current[0], sort(current[1] + to_push)) | 186 range_map[i + 1] = (current[0], sort(current[1] + to_push)) |
| 168 TransitionKey.__verify_ranges(ranges) | 187 return ranges |
| 188 |
| 189 @staticmethod |
| 190 def disjoint_keys(key_set): |
| 191 if not key_set: |
| 192 return [] |
| 193 range_map = {} |
| 194 for x in key_set: |
| 195 for r in x.__ranges: |
| 196 if not r[0] in range_map: |
| 197 range_map[r[0]] = [] |
| 198 range_map[r[0]].append(r[1]) |
| 199 ranges = TransitionKey.__disjoint_keys(range_map) |
| 200 TransitionKey.__verify_ranges(ranges, False) |
| 169 return map(lambda x : TransitionKey.__create([x]), ranges) | 201 return map(lambda x : TransitionKey.__create([x]), ranges) |
| 202 |
| 203 @staticmethod |
| 204 def __key_from_ranges(invert, ranges): |
| 205 range_map = {} |
| 206 for r in ranges: |
| 207 if not r[0] in range_map: |
| 208 range_map[r[0]] = [] |
| 209 range_map[r[0]].append(r[1]) |
| 210 ranges = TransitionKey.__disjoint_keys(range_map) |
| 211 ranges = TransitionKey.__merge_ranges(ranges) |
| 212 if invert: |
| 213 ranges = TransitionKey.__invert_ranges(ranges) |
| 214 return TransitionKey.__create(ranges) |
| 215 |
| 216 @staticmethod |
| 217 def __merge_ranges(ranges): |
| 218 merged = [] |
| 219 last = None |
| 220 for r in ranges: |
| 221 if last == None: |
| 222 last = r |
| 223 elif (last[1] + 1 == r[0] and |
| 224 r != TransitionKey.__unicode_whitespace_bounds and |
| 225 r != TransitionKey.__unicode_literal_bounds): |
| 226 last = (last[0], r[1]) |
| 227 else: |
| 228 merged.append(last) |
| 229 last = r |
| 230 if last != None: |
| 231 merged.append(last) |
| 232 return merged |
| 233 |
| 234 @staticmethod |
| 235 def __invert_ranges(ranges): |
| 236 inverted = [] |
| 237 last = None |
| 238 contains_whitespace = False |
| 239 contains_literal = False |
| 240 for r in ranges: |
| 241 if r == TransitionKey.__unicode_whitespace_bounds: |
| 242 contains_whitespace = True |
| 243 continue |
| 244 if r == TransitionKey.__unicode_literal_bounds: |
| 245 contains_literal = True |
| 246 continue |
| 247 if last == None: |
| 248 if r[0] != TransitionKey.__lower_bound: |
| 249 inverted.append((TransitionKey.__lower_bound, r[0] - 1)) |
| 250 elif last[1] + 1 < r[0]: |
| 251 inverted.append((last[1] + 1, r[0] - 1)) |
| 252 last = r |
| 253 if last != None and last[1] < TransitionKey.__latin_1_upper_bound: |
| 254 inverted.append((last[1] + 1, TransitionKey.__latin_1_upper_bound)) |
| 255 if not contains_whitespace: |
| 256 inverted.append(TransitionKey.__unicode_whitespace_bounds) |
| 257 if not contains_literal: |
| 258 inverted.append(TransitionKey.__unicode_literal_bounds) |
| 259 return inverted |
| OLD | NEW |