Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: tools/lexer_generator/transition_keys.py

Issue 80263003: Experimental parser: split classes into latin1 and non latin1 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 22 matching lines...) Expand all
33 A transition key has a list of character ranges and a list of class ranges 33 A transition key has a list of character ranges and a list of class ranges
34 (e.g., "whitespace"), defining for which characters the transition 34 (e.g., "whitespace"), defining for which characters the transition
35 happens. When we generate code based on the transition key, the character 35 happens. When we generate code based on the transition key, the character
36 ranges generate simple checks and the class ranges generate more complicated 36 ranges generate simple checks and the class ranges generate more complicated
37 conditions, e.g., function calls.''' 37 conditions, e.g., function calls.'''
38 38
39 __class_bounds = { 39 __class_bounds = {
40 'latin_1' : (1, 255), 40 'latin_1' : (1, 255),
41 # These are not real ranges; they just need to be separate from any real 41 # These are not real ranges; they just need to be separate from any real
42 # ranges. 42 # ranges.
43 'whitespace' : (256, 256), 43 'non_latin_1_whitespace' : (256, 256),
44 'letter' : (257, 257), 44 'non_latin_1_letter' : (257, 257),
45 'identifier_part_not_letter' : (258, 258), 45 'non_latin1_identifier_part_not_letter' : (258, 258),
46 'eos' : (259, 259), 46 'non_latin1_line_terminator' : (259, 259),
47 'zero' : (260, 260), 47 'eos' : (260, 260),
48 'zero' : (261, 261),
48 } 49 }
49 __lower_bound = 1 50 __lower_bound = min(__class_bounds.values(), key=lambda item: item[0])[0]
50 __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1] 51 __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1]
51 52
52 __cached_keys = {} 53 __cached_keys = {}
53 54
54 __unique_key_counter = -1 55 __unique_key_counter = -1
55 56
57 __predefined_ranges = {
58 'whitespace' : [
59 (9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
60 __class_bounds['non_latin_1_whitespace']],
61 'letter' : [
62 (65, 90), (97, 122), (170, 170), (181, 181),
63 (186, 186), (192, 214), (216, 246), (248, 255),
64 __class_bounds['non_latin_1_letter']],
65 'line_terminator' : [
66 (10, 10), (13, 13),
67 __class_bounds['non_latin1_line_terminator']],
68 'identifier_part_not_letter' : [
69 (48, 57), (95, 95),
70 __class_bounds['non_latin1_identifier_part_not_letter']],
71 }
72
56 @staticmethod 73 @staticmethod
57 def __in_latin_1(char): 74 def __in_latin_1(char):
58 bound = TransitionKey.__class_bounds['latin_1'] 75 bound = TransitionKey.__class_bounds['latin_1']
59 return (bound[0] <= char and char <= bound[1]) 76 return (bound[0] <= char and char <= bound[1])
60 77
61 @staticmethod 78 @staticmethod
62 def __is_class_range(r): 79 def __is_class_range(r):
63 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0]) 80 return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0])
64 81
65 @staticmethod 82 @staticmethod
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 key = graph[0] 150 key = graph[0]
134 if key == 'RANGE': 151 if key == 'RANGE':
135 ranges.append((ord(graph[1]), ord(graph[2]))) 152 ranges.append((ord(graph[1]), ord(graph[2])))
136 elif key == 'LITERAL': 153 elif key == 'LITERAL':
137 ranges.append((ord(graph[1]), ord(graph[1]))) 154 ranges.append((ord(graph[1]), ord(graph[1])))
138 elif key == 'CAT': 155 elif key == 'CAT':
139 for x in [graph[1], graph[2]]: 156 for x in [graph[1], graph[2]]:
140 TransitionKey.__process_graph(x, ranges, key_map) 157 TransitionKey.__process_graph(x, ranges, key_map)
141 elif key == 'CHARACTER_CLASS': 158 elif key == 'CHARACTER_CLASS':
142 class_name = graph[1] 159 class_name = graph[1]
143 if class_name in TransitionKey.__class_bounds.keys(): 160 if class_name in TransitionKey.__class_bounds:
161 if class_name in key_map:
162 assert (key_map[class_name] ==
163 TransitionKey([TransitionKey.__class_bounds[class_name]]))
144 ranges.append(TransitionKey.__class_bounds[class_name]) 164 ranges.append(TransitionKey.__class_bounds[class_name])
165 elif class_name in TransitionKey.__predefined_ranges:
166 if class_name in key_map:
167 assert (key_map[class_name] ==
168 TransitionKey(TransitionKey.__predefined_ranges[class_name]))
169 ranges += TransitionKey.__predefined_ranges[class_name]
145 elif class_name in key_map: 170 elif class_name in key_map:
146 ranges += key_map[class_name].__ranges 171 ranges += key_map[class_name].__ranges
147 else: 172 else:
148 raise Exception('unknown character class [%s]' % graph[1]) 173 raise Exception('unknown character class [%s]' % graph[1])
149 else: 174 else:
150 raise Exception('bad key [%s]' % key) 175 raise Exception('bad key [%s]' % key)
151 176
152 @staticmethod 177 @staticmethod
153 def character_class(graph, key_map): 178 def character_class(graph, key_map):
154 '''Processes 'graph' (a representation of a character class in the rule 179 '''Processes 'graph' (a representation of a character class in the rule
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 elif last[1] + 1 < r[0]: 414 elif last[1] + 1 < r[0]:
390 inverted.append((last[1] + 1, r[0] - 1)) 415 inverted.append((last[1] + 1, r[0] - 1))
391 last = r 416 last = r
392 upper_bound = latin_1[1] 417 upper_bound = latin_1[1]
393 if last == None: 418 if last == None:
394 inverted.append(latin_1) 419 inverted.append(latin_1)
395 elif last[1] < upper_bound: 420 elif last[1] < upper_bound:
396 inverted.append((last[1] + 1, upper_bound)) 421 inverted.append((last[1] + 1, upper_bound))
397 inverted += list(classes) 422 inverted += list(classes)
398 return inverted 423 return inverted
OLDNEW
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698