Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(69)

Side by Side Diff: tools/lexer_generator/transition_keys.py

Issue 156313003: Experimental parser: cleanup transition_keys a bit (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 30 matching lines...) Expand all
41 Utf16Encoding() 41 Utf16Encoding()
42 Utf8Encoding() 42 Utf8Encoding()
43 return KeyEncoding.__encodings[name] 43 return KeyEncoding.__encodings[name]
44 44
45 def __init__(self, name, primary_range, named_ranges, predefined_ranges): 45 def __init__(self, name, primary_range, named_ranges, predefined_ranges):
46 assert not name in KeyEncoding.__encodings 46 assert not name in KeyEncoding.__encodings
47 assert primary_range[0] <= primary_range[1] 47 assert primary_range[0] <= primary_range[1]
48 KeyEncoding.__encodings[name] = self 48 KeyEncoding.__encodings[name] = self
49 self.__name = name 49 self.__name = name
50 self.__primary_range = primary_range 50 self.__primary_range = primary_range
51 self.__primary_range_component = Term(
52 'NUMERIC_RANGE_KEY', primary_range[0], primary_range[1])
53 self.__lower_bound = primary_range[0] 51 self.__lower_bound = primary_range[0]
54 self.__upper_bound = primary_range[1] 52 self.__upper_bound = primary_range[1]
53 self.__primary_range_component = self.numeric_range_term(primary_range[0],
54 primary_range[1])
55 self.__named_ranges = { 55 self.__named_ranges = {
56 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges } 56 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
57 def f(v): 57 def f(v):
58 if len(v) == 2: 58 if len(v) == 2:
59 assert primary_range[0] <= v[0] and v[1] <= primary_range[1] 59 return self.numeric_range_term(v[0], v[1])
60 return Term('NUMERIC_RANGE_KEY', v[0], v[1])
61 elif len(v) == 1: 60 elif len(v) == 1:
62 assert v[0] in self.__named_ranges 61 assert v[0] in self.__named_ranges
63 return self.__named_ranges[v[0]] 62 return self.__named_ranges[v[0]]
64 else: 63 else:
65 raise Exception() 64 raise Exception()
66 self.__predefined_ranges = { 65 self.__predefined_ranges = {
67 k : map(f, v) for k, v in predefined_ranges.iteritems() } 66 k : map(f, v) for k, v in predefined_ranges.iteritems() }
68 67
69 def name(self): 68 def name(self):
70 return self.__name 69 return self.__name
(...skipping 24 matching lines...) Expand all
95 ranges = self.__predefined_ranges 94 ranges = self.__predefined_ranges
96 return None if not name in ranges else iter(ranges[name]) 95 return None if not name in ranges else iter(ranges[name])
97 96
98 def __primary_range_iter(self): 97 def __primary_range_iter(self):
99 yield self.__primary_range_component 98 yield self.__primary_range_component
100 99
101 def all_components_iter(self): 100 def all_components_iter(self):
102 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues()) 101 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues())
103 102
104 def is_primary_range(self, r): 103 def is_primary_range(self, r):
105 assert self.lower_bound() <= r[0] and r[1] <= self.upper_bound() 104 assert len(r) == 2
106 primary_range = self.__primary_range 105 return self.in_primary_range(r[0], r[1])
107 if (primary_range[0] <= r[0] and r[1] <= primary_range[1]):
108 return True
109 assert r[0] == r[1]
110 return False
111 106
112 def in_primary_range(self, c): 107 def in_primary_range(self, a, b):
113 return self.is_primary_range((c, c)) 108 return self.lower_bound() <= a and b <= self.upper_bound()
109
110 def numeric_range_term(self, a, b):
111 assert type(a) == IntType and type(b) == IntType
112 assert self.in_primary_range(a, b)
113 return Term('NUMERIC_RANGE_KEY', a, b)
114 114
115 class TransitionKey(object): 115 class TransitionKey(object):
116 '''Represents a transition from a state in DFA or NFA to another state. 116 '''Represents a transition from a state in DFA or NFA to another state.
117 117
118 A transition key has a list of character ranges and a list of class ranges 118 A transition key has a list of character ranges and a list of class ranges
119 (e.g., "whitespace"), defining for which characters the transition 119 (e.g., "whitespace"), defining for which characters the transition
120 happens. When we generate code based on the transition key, the character 120 happens. When we generate code based on the transition key, the character
121 ranges generate simple checks and the class ranges generate more complicated 121 ranges generate simple checks and the class ranges generate more complicated
122 conditions, e.g., function calls.''' 122 conditions, e.g., function calls.'''
123 123
(...skipping 30 matching lines...) Expand all
154 lambda : encoding.all_components_iter()) 154 lambda : encoding.all_components_iter())
155 155
156 @staticmethod 156 @staticmethod
157 def single_char(encoding, char): 157 def single_char(encoding, char):
158 '''Returns a TransitionKey for a single-character transition.''' 158 '''Returns a TransitionKey for a single-character transition.'''
159 return TransitionKey.range(encoding, char, char) 159 return TransitionKey.range(encoding, char, char)
160 160
161 @staticmethod 161 @staticmethod
162 def range(encoding, a, b): 162 def range(encoding, a, b):
163 '''Returns a TransitionKey for a single-character transition.''' 163 '''Returns a TransitionKey for a single-character transition.'''
164 assert type(a) == IntType and type(b) == IntType 164 return TransitionKey(encoding, encoding.numeric_range_term(a, b))
165 return TransitionKey(encoding, Term("NUMERIC_RANGE_KEY", a, b))
166 165
167 @staticmethod 166 @staticmethod
168 def unique(term): # TODO(dcarney): rename 167 def unique(term): # TODO(dcarney): rename
169 '''Returns a unique TransitionKey for the given name (and creates it if it 168 '''Returns a unique TransitionKey for the given name (and creates it if it
170 doesn't exist yet). The TransitionKey won't have any real character range, 169 doesn't exist yet). The TransitionKey won't have any real character range,
171 but a newly-created "mock" character range which is separate from all other 170 but a newly-created "mock" character range which is separate from all other
172 character ranges.''' 171 character ranges.'''
173 return TransitionKey(None, Term("TERM_KEY", term)) 172 return TransitionKey(None, Term("TERM_KEY", term))
174 173
175 @staticmethod 174 @staticmethod
176 def __process_term(encoding, term, components, key_map): 175 def __process_term(encoding, term, components, key_map):
177 key = term.name() 176 key = term.name()
178 args = term.args() 177 args = term.args()
179 if key == 'RANGE': 178 if key == 'RANGE':
180 components.append(Term('NUMERIC_RANGE_KEY', args[0], args[1])) 179 components.append(encoding.numeric_range_term(args[0], args[1]))
181 elif key == 'LITERAL': 180 elif key == 'LITERAL':
182 components += map(lambda x : Term('NUMERIC_RANGE_KEY', x, x), args) 181 components += map(lambda x : encoding.numeric_range_term(x, x), args)
183 elif key == 'CAT': 182 elif key == 'CAT':
184 for x in args: 183 for x in args:
185 TransitionKey.__process_term(encoding, x, components, key_map) 184 TransitionKey.__process_term(encoding, x, components, key_map)
186 elif key == 'CHARACTER_CLASS': 185 elif key == 'CHARACTER_CLASS':
187 class_name = args[0] 186 class_name = args[0]
188 if encoding.named_range(class_name): 187 if encoding.named_range(class_name):
189 c = encoding.named_range(class_name) 188 c = encoding.named_range(class_name)
190 if class_name in key_map: 189 if class_name in key_map:
191 assert key_map[class_name] == TransitionKey(encoding, c) 190 assert key_map[class_name] == TransitionKey(encoding, c)
192 components.append(c) 191 components.append(c)
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
338 elif component.name() == 'NAMED_RANGE_KEY': 337 elif component.name() == 'NAMED_RANGE_KEY':
339 return component.args()[0] 338 return component.args()[0]
340 elif component.name() == 'EPSILON_KEY': 339 elif component.name() == 'EPSILON_KEY':
341 return 'epsilon' 340 return 'epsilon'
342 elif component.name() == 'OMEGA_KEY': 341 elif component.name() == 'OMEGA_KEY':
343 return 'omega' 342 return 'omega'
344 elif component.name() != 'NUMERIC_RANGE_KEY': 343 elif component.name() != 'NUMERIC_RANGE_KEY':
345 raise Exception('unprintable %s' % component) 344 raise Exception('unprintable %s' % component)
346 r = component.args() 345 r = component.args()
347 def to_str(x): 346 def to_str(x):
348 assert not encoding or encoding.in_primary_range(x) 347 assert not encoding or encoding.in_primary_range(x, x)
349 if x > 127: 348 if x > 127:
350 return str(x) 349 return str(x)
351 if not x in TransitionKey.__printable_cache: 350 if not x in TransitionKey.__printable_cache:
352 res = "'%s'" % chr(x) if chr(x) in printable else str(x) 351 res = "'%s'" % chr(x) if chr(x) in printable else str(x)
353 TransitionKey.__printable_cache[x] = res 352 TransitionKey.__printable_cache[x] = res
354 return TransitionKey.__printable_cache[x] 353 return TransitionKey.__printable_cache[x]
355 if r[0] == r[1]: 354 if r[0] == r[1]:
356 return '%s' % to_str(r[0]) 355 return '%s' % to_str(r[0])
357 else: 356 else:
358 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) 357 return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
512 other_keys.add(x) 511 other_keys.add(x)
513 continue 512 continue
514 (start, end) = x.args() 513 (start, end) = x.args()
515 if not start in range_map: 514 if not start in range_map:
516 range_map[start] = [] 515 range_map[start] = []
517 range_map[start].append(end) 516 range_map[start].append(end)
518 ranges = TransitionKey.__disjoint_keys(encoding, range_map) 517 ranges = TransitionKey.__disjoint_keys(encoding, range_map)
519 if merge_ranges: 518 if merge_ranges:
520 ranges = TransitionKey.__merge_ranges(encoding, ranges) 519 ranges = TransitionKey.__merge_ranges(encoding, ranges)
521 other_keys = sorted(other_keys, cmp=TransitionKey.__component_compare) 520 other_keys = sorted(other_keys, cmp=TransitionKey.__component_compare)
522 range_terms = map(lambda x : Term('NUMERIC_RANGE_KEY', x[0], x[1]), ranges) 521 range_terms = map(
522 lambda x : encoding.numeric_range_term(x[0], x[1]), ranges)
523 return chain(iter(range_terms), iter(other_keys)) 523 return chain(iter(range_terms), iter(other_keys))
524 524
525 @staticmethod 525 @staticmethod
526 def __key_from_components(encoding, invert, components): 526 def __key_from_components(encoding, invert, components):
527 components = TransitionKey.__disjoint_components(encoding, components, True) 527 components = TransitionKey.__disjoint_components(encoding, components, True)
528 if invert: 528 if invert:
529 components = TransitionKey.__invert_components(encoding, components) 529 components = TransitionKey.__invert_components(encoding, components)
530 return None if not components else TransitionKey(encoding, components) 530 return None if not components else TransitionKey(encoding, components)
531 531
532 @staticmethod 532 @staticmethod
(...skipping 20 matching lines...) Expand all
553 encoding, True, TransitionKey.__flatten_keys(keys)) 553 encoding, True, TransitionKey.__flatten_keys(keys))
554 554
555 @staticmethod 555 @staticmethod
556 def merged_key(encoding, keys): 556 def merged_key(encoding, keys):
557 return TransitionKey(encoding, 557 return TransitionKey(encoding,
558 TransitionKey.__disjoint_components_from_keys(encoding, keys, True)) 558 TransitionKey.__disjoint_components_from_keys(encoding, keys, True))
559 559
560 @staticmethod 560 @staticmethod
561 def __invert_components(encoding, components): 561 def __invert_components(encoding, components):
562 def key(x, y): 562 def key(x, y):
563 return Term('NUMERIC_RANGE_KEY', x, y) 563 return encoding.numeric_range_term(x, y)
564 last = None 564 last = None
565 classes = set(encoding.named_range_value_iter()) 565 classes = set(encoding.named_range_value_iter())
566 for c in components: 566 for c in components:
567 if c in classes: 567 if c in classes:
568 classes.remove(c) 568 classes.remove(c)
569 continue 569 continue
570 assert c.name() == 'NUMERIC_RANGE_KEY', 'unencoded key not invertible' 570 assert c.name() == 'NUMERIC_RANGE_KEY', 'unencoded key not invertible'
571 r = c.args() 571 r = c.args()
572 assert encoding.is_primary_range(r) 572 assert encoding.is_primary_range(r)
573 if last == None: 573 if last == None:
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
643 { 643 {
644 'whitespace': 644 'whitespace':
645 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)], 645 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
646 'letter': 646 'letter':
647 [(65, 90), (97, 122), ('non_primary_letter',)], 647 [(65, 90), (97, 122), ('non_primary_letter',)],
648 'line_terminator': 648 'line_terminator':
649 [(10, 10), (13, 13), ('non_primary_line_terminator',)], 649 [(10, 10), (13, 13), ('non_primary_line_terminator',)],
650 'identifier_part_not_letter': 650 'identifier_part_not_letter':
651 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], 651 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
652 }) 652 })
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698