| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 A transition key has a list of character ranges and a list of class ranges | 33 A transition key has a list of character ranges and a list of class ranges |
| 34 (e.g., "whitespace"), defining for which characters the transition | 34 (e.g., "whitespace"), defining for which characters the transition |
| 35 happens. When we generate code based on the transition key, the character | 35 happens. When we generate code based on the transition key, the character |
| 36 ranges generate simple checks and the class ranges generate more complicated | 36 ranges generate simple checks and the class ranges generate more complicated |
| 37 conditions, e.g., function calls.''' | 37 conditions, e.g., function calls.''' |
| 38 | 38 |
| 39 __class_bounds = { | 39 __class_bounds = { |
| 40 'latin_1' : (1, 255), | 40 'latin_1' : (1, 255), |
| 41 # These are not real ranges; they just need to be separate from any real | 41 # These are not real ranges; they just need to be separate from any real |
| 42 # ranges. | 42 # ranges. |
| 43 'whitespace' : (256, 256), | 43 'non_latin_1_whitespace' : (256, 256), |
| 44 'letter' : (257, 257), | 44 'non_latin_1_letter' : (257, 257), |
| 45 'identifier_part_not_letter' : (258, 258), | 45 'non_latin1_identifier_part_not_letter' : (258, 258), |
| 46 'eos' : (259, 259), | 46 'non_latin1_line_terminator' : (259, 259), |
| 47 'zero' : (260, 260), | 47 'eos' : (260, 260), |
| 48 'zero' : (261, 261), |
| 48 } | 49 } |
| 49 __lower_bound = 1 | 50 __lower_bound = min(__class_bounds.values(), key=lambda item: item[0])[0] |
| 50 __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1] | 51 __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1] |
| 51 | 52 |
| 52 __cached_keys = {} | 53 __cached_keys = {} |
| 53 | 54 |
| 54 __unique_key_counter = -1 | 55 __unique_key_counter = -1 |
| 55 | 56 |
| 57 __predefined_ranges = { |
| 58 'whitespace' : [ |
| 59 (9, 9), (11, 12), (32, 32), (133, 133), (160, 160), |
| 60 __class_bounds['non_latin_1_whitespace']], |
| 61 'letter' : [ |
| 62 (65, 90), (97, 122), (170, 170), (181, 181), |
| 63 (186, 186), (192, 214), (216, 246), (248, 255), |
| 64 __class_bounds['non_latin_1_letter']], |
| 65 'line_terminator' : [ |
| 66 (10, 10), (13, 13), |
| 67 __class_bounds['non_latin1_line_terminator']], |
| 68 'identifier_part_not_letter' : [ |
| 69 (48, 57), (95, 95), |
| 70 __class_bounds['non_latin1_identifier_part_not_letter']], |
| 71 } |
| 72 |
| 56 @staticmethod | 73 @staticmethod |
| 57 def __in_latin_1(char): | 74 def __in_latin_1(char): |
| 58 bound = TransitionKey.__class_bounds['latin_1'] | 75 bound = TransitionKey.__class_bounds['latin_1'] |
| 59 return (bound[0] <= char and char <= bound[1]) | 76 return (bound[0] <= char and char <= bound[1]) |
| 60 | 77 |
| 61 @staticmethod | 78 @staticmethod |
| 62 def __is_class_range(r): | 79 def __is_class_range(r): |
| 63 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0]) | 80 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0]) |
| 64 | 81 |
| 65 @staticmethod | 82 @staticmethod |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 133 key = graph[0] | 150 key = graph[0] |
| 134 if key == 'RANGE': | 151 if key == 'RANGE': |
| 135 ranges.append((ord(graph[1]), ord(graph[2]))) | 152 ranges.append((ord(graph[1]), ord(graph[2]))) |
| 136 elif key == 'LITERAL': | 153 elif key == 'LITERAL': |
| 137 ranges.append((ord(graph[1]), ord(graph[1]))) | 154 ranges.append((ord(graph[1]), ord(graph[1]))) |
| 138 elif key == 'CAT': | 155 elif key == 'CAT': |
| 139 for x in [graph[1], graph[2]]: | 156 for x in [graph[1], graph[2]]: |
| 140 TransitionKey.__process_graph(x, ranges, key_map) | 157 TransitionKey.__process_graph(x, ranges, key_map) |
| 141 elif key == 'CHARACTER_CLASS': | 158 elif key == 'CHARACTER_CLASS': |
| 142 class_name = graph[1] | 159 class_name = graph[1] |
| 143 if class_name in TransitionKey.__class_bounds.keys(): | 160 if class_name in TransitionKey.__class_bounds: |
| 161 if class_name in key_map: |
| 162 assert (key_map[class_name] == |
| 163 TransitionKey([TransitionKey.__class_bounds[class_name]])) |
| 144 ranges.append(TransitionKey.__class_bounds[class_name]) | 164 ranges.append(TransitionKey.__class_bounds[class_name]) |
| 165 elif class_name in TransitionKey.__predefined_ranges: |
| 166 if class_name in key_map: |
| 167 assert (key_map[class_name] == |
| 168 TransitionKey(TransitionKey.__predefined_ranges[class_name])) |
| 169 ranges += TransitionKey.__predefined_ranges[class_name] |
| 145 elif class_name in key_map: | 170 elif class_name in key_map: |
| 146 ranges += key_map[class_name].__ranges | 171 ranges += key_map[class_name].__ranges |
| 147 else: | 172 else: |
| 148 raise Exception('unknown character class [%s]' % graph[1]) | 173 raise Exception('unknown character class [%s]' % graph[1]) |
| 149 else: | 174 else: |
| 150 raise Exception('bad key [%s]' % key) | 175 raise Exception('bad key [%s]' % key) |
| 151 | 176 |
| 152 @staticmethod | 177 @staticmethod |
| 153 def character_class(graph, key_map): | 178 def character_class(graph, key_map): |
| 154 '''Processes 'graph' (a representation of a character class in the rule | 179 '''Processes 'graph' (a representation of a character class in the rule |
| (...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 389 elif last[1] + 1 < r[0]: | 414 elif last[1] + 1 < r[0]: |
| 390 inverted.append((last[1] + 1, r[0] - 1)) | 415 inverted.append((last[1] + 1, r[0] - 1)) |
| 391 last = r | 416 last = r |
| 392 upper_bound = latin_1[1] | 417 upper_bound = latin_1[1] |
| 393 if last == None: | 418 if last == None: |
| 394 inverted.append(latin_1) | 419 inverted.append(latin_1) |
| 395 elif last[1] < upper_bound: | 420 elif last[1] < upper_bound: |
| 396 inverted.append((last[1] + 1, upper_bound)) | 421 inverted.append((last[1] + 1, upper_bound)) |
| 397 inverted += list(classes) | 422 inverted += list(classes) |
| 398 return inverted | 423 return inverted |
| OLD | NEW |