Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(56)

Unified Diff: tools/lexer_generator/transition_keys.py

Issue 80263003: Experimental parser: split classes into latin1 and non latin1 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py
index 5fda7affc2e98b86b0b6b95409da7ca1d861bb9e..ca99b74f01e24eab3ed0e9bb2b4e021c3c930284 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -40,19 +40,36 @@ class TransitionKey:
'latin_1' : (1, 255),
# These are not real ranges; they just need to be separate from any real
# ranges.
- 'whitespace' : (256, 256),
- 'letter' : (257, 257),
- 'identifier_part_not_letter' : (258, 258),
- 'eos' : (259, 259),
- 'zero' : (260, 260),
+ 'non_latin_1_whitespace' : (256, 256),
+ 'non_latin_1_letter' : (257, 257),
+ 'non_latin1_identifier_part_not_letter' : (258, 258),
+ 'non_latin1_line_terminator' : (259, 259),
+ 'eos' : (260, 260),
+ 'zero' : (261, 261),
}
- __lower_bound = 1
+ __lower_bound = min(__class_bounds.values(), key=lambda item: item[0])[0]
__upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1]
__cached_keys = {}
__unique_key_counter = -1
+ __predefined_ranges = {
+ 'whitespace' : [
+ (9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
+ __class_bounds['non_latin_1_whitespace']],
+ 'letter' : [
+ (65, 90), (97, 122), (170, 170), (181, 181),
+ (186, 186), (192, 214), (216, 246), (248, 255),
+ __class_bounds['non_latin_1_letter']],
+ 'line_terminator' : [
+ (10, 10), (13, 13),
+ __class_bounds['non_latin1_line_terminator']],
+ 'identifier_part_not_letter' : [
+ (48, 57), (95, 95),
+ __class_bounds['non_latin1_identifier_part_not_letter']],
+ }
+
@staticmethod
def __in_latin_1(char):
bound = TransitionKey.__class_bounds['latin_1']
@@ -140,8 +157,16 @@ class TransitionKey:
TransitionKey.__process_graph(x, ranges, key_map)
elif key == 'CHARACTER_CLASS':
class_name = graph[1]
- if class_name in TransitionKey.__class_bounds.keys():
+ if class_name in TransitionKey.__class_bounds:
+ if class_name in key_map:
+ assert (key_map[class_name] ==
+ TransitionKey([TransitionKey.__class_bounds[class_name]]))
ranges.append(TransitionKey.__class_bounds[class_name])
+ elif class_name in TransitionKey.__predefined_ranges:
+ if class_name in key_map:
+ assert (key_map[class_name] ==
+ TransitionKey(TransitionKey.__predefined_ranges[class_name]))
+ ranges += TransitionKey.__predefined_ranges[class_name]
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698