| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 30 matching lines...) Expand all Loading... |
| 41 Utf16Encoding() | 41 Utf16Encoding() |
| 42 Utf8Encoding() | 42 Utf8Encoding() |
| 43 return KeyEncoding.__encodings[name] | 43 return KeyEncoding.__encodings[name] |
| 44 | 44 |
| 45 def __init__(self, name, primary_range, named_ranges, predefined_ranges): | 45 def __init__(self, name, primary_range, named_ranges, predefined_ranges): |
| 46 assert not name in KeyEncoding.__encodings | 46 assert not name in KeyEncoding.__encodings |
| 47 assert primary_range[0] <= primary_range[1] | 47 assert primary_range[0] <= primary_range[1] |
| 48 KeyEncoding.__encodings[name] = self | 48 KeyEncoding.__encodings[name] = self |
| 49 self.__name = name | 49 self.__name = name |
| 50 self.__primary_range = primary_range | 50 self.__primary_range = primary_range |
| 51 self.__primary_range_component = Term( | |
| 52 'NUMERIC_RANGE_KEY', primary_range[0], primary_range[1]) | |
| 53 self.__lower_bound = primary_range[0] | 51 self.__lower_bound = primary_range[0] |
| 54 self.__upper_bound = primary_range[1] | 52 self.__upper_bound = primary_range[1] |
| 53 self.__primary_range_component = self.numeric_range_term(primary_range[0], |
| 54 primary_range[1]) |
| 55 self.__named_ranges = { | 55 self.__named_ranges = { |
| 56 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges } | 56 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges } |
| 57 def f(v): | 57 def f(v): |
| 58 if len(v) == 2: | 58 if len(v) == 2: |
| 59 assert primary_range[0] <= v[0] and v[1] <= primary_range[1] | 59 return self.numeric_range_term(v[0], v[1]) |
| 60 return Term('NUMERIC_RANGE_KEY', v[0], v[1]) | |
| 61 elif len(v) == 1: | 60 elif len(v) == 1: |
| 62 assert v[0] in self.__named_ranges | 61 assert v[0] in self.__named_ranges |
| 63 return self.__named_ranges[v[0]] | 62 return self.__named_ranges[v[0]] |
| 64 else: | 63 else: |
| 65 raise Exception() | 64 raise Exception() |
| 66 self.__predefined_ranges = { | 65 self.__predefined_ranges = { |
| 67 k : map(f, v) for k, v in predefined_ranges.iteritems() } | 66 k : map(f, v) for k, v in predefined_ranges.iteritems() } |
| 68 | 67 |
| 69 def name(self): | 68 def name(self): |
| 70 return self.__name | 69 return self.__name |
| (...skipping 24 matching lines...) Expand all Loading... |
| 95 ranges = self.__predefined_ranges | 94 ranges = self.__predefined_ranges |
| 96 return None if not name in ranges else iter(ranges[name]) | 95 return None if not name in ranges else iter(ranges[name]) |
| 97 | 96 |
| 98 def __primary_range_iter(self): | 97 def __primary_range_iter(self): |
| 99 yield self.__primary_range_component | 98 yield self.__primary_range_component |
| 100 | 99 |
| 101 def all_components_iter(self): | 100 def all_components_iter(self): |
| 102 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues()) | 101 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues()) |
| 103 | 102 |
| 104 def is_primary_range(self, r): | 103 def is_primary_range(self, r): |
| 105 assert self.lower_bound() <= r[0] and r[1] <= self.upper_bound() | 104 assert len(r) == 2 |
| 106 primary_range = self.__primary_range | 105 return self.in_primary_range(r[0], r[1]) |
| 107 if (primary_range[0] <= r[0] and r[1] <= primary_range[1]): | |
| 108 return True | |
| 109 assert r[0] == r[1] | |
| 110 return False | |
| 111 | 106 |
| 112 def in_primary_range(self, c): | 107 def in_primary_range(self, a, b): |
| 113 return self.is_primary_range((c, c)) | 108 return self.lower_bound() <= a and b <= self.upper_bound() |
| 109 |
| 110 def numeric_range_term(self, a, b): |
| 111 assert type(a) == IntType and type(b) == IntType |
| 112 assert self.in_primary_range(a, b) |
| 113 return Term('NUMERIC_RANGE_KEY', a, b) |
| 114 | 114 |
| 115 class TransitionKey(object): | 115 class TransitionKey(object): |
| 116 '''Represents a transition from a state in DFA or NFA to another state. | 116 '''Represents a transition from a state in DFA or NFA to another state. |
| 117 | 117 |
| 118 A transition key has a list of character ranges and a list of class ranges | 118 A transition key has a list of character ranges and a list of class ranges |
| 119 (e.g., "whitespace"), defining for which characters the transition | 119 (e.g., "whitespace"), defining for which characters the transition |
| 120 happens. When we generate code based on the transition key, the character | 120 happens. When we generate code based on the transition key, the character |
| 121 ranges generate simple checks and the class ranges generate more complicated | 121 ranges generate simple checks and the class ranges generate more complicated |
| 122 conditions, e.g., function calls.''' | 122 conditions, e.g., function calls.''' |
| 123 | 123 |
| (...skipping 30 matching lines...) Expand all Loading... |
| 154 lambda : encoding.all_components_iter()) | 154 lambda : encoding.all_components_iter()) |
| 155 | 155 |
| 156 @staticmethod | 156 @staticmethod |
| 157 def single_char(encoding, char): | 157 def single_char(encoding, char): |
| 158 '''Returns a TransitionKey for a single-character transition.''' | 158 '''Returns a TransitionKey for a single-character transition.''' |
| 159 return TransitionKey.range(encoding, char, char) | 159 return TransitionKey.range(encoding, char, char) |
| 160 | 160 |
| 161 @staticmethod | 161 @staticmethod |
| 162 def range(encoding, a, b): | 162 def range(encoding, a, b): |
| 163 '''Returns a TransitionKey for a single-character transition.''' | 163 '''Returns a TransitionKey for a single-character transition.''' |
| 164 assert type(a) == IntType and type(b) == IntType | 164 return TransitionKey(encoding, encoding.numeric_range_term(a, b)) |
| 165 return TransitionKey(encoding, Term("NUMERIC_RANGE_KEY", a, b)) | |
| 166 | 165 |
| 167 @staticmethod | 166 @staticmethod |
| 168 def unique(term): # TODO(dcarney): rename | 167 def unique(term): # TODO(dcarney): rename |
| 169 '''Returns a unique TransitionKey for the given name (and creates it if it | 168 '''Returns a unique TransitionKey for the given name (and creates it if it |
| 170 doesn't exist yet). The TransitionKey won't have any real character range, | 169 doesn't exist yet). The TransitionKey won't have any real character range, |
| 171 but a newly-created "mock" character range which is separate from all other | 170 but a newly-created "mock" character range which is separate from all other |
| 172 character ranges.''' | 171 character ranges.''' |
| 173 return TransitionKey(None, Term("TERM_KEY", term)) | 172 return TransitionKey(None, Term("TERM_KEY", term)) |
| 174 | 173 |
| 175 @staticmethod | 174 @staticmethod |
| 176 def __process_term(encoding, term, components, key_map): | 175 def __process_term(encoding, term, components, key_map): |
| 177 key = term.name() | 176 key = term.name() |
| 178 args = term.args() | 177 args = term.args() |
| 179 if key == 'RANGE': | 178 if key == 'RANGE': |
| 180 components.append(Term('NUMERIC_RANGE_KEY', args[0], args[1])) | 179 components.append(encoding.numeric_range_term(args[0], args[1])) |
| 181 elif key == 'LITERAL': | 180 elif key == 'LITERAL': |
| 182 components += map(lambda x : Term('NUMERIC_RANGE_KEY', x, x), args) | 181 components += map(lambda x : encoding.numeric_range_term(x, x), args) |
| 183 elif key == 'CAT': | 182 elif key == 'CAT': |
| 184 for x in args: | 183 for x in args: |
| 185 TransitionKey.__process_term(encoding, x, components, key_map) | 184 TransitionKey.__process_term(encoding, x, components, key_map) |
| 186 elif key == 'CHARACTER_CLASS': | 185 elif key == 'CHARACTER_CLASS': |
| 187 class_name = args[0] | 186 class_name = args[0] |
| 188 if encoding.named_range(class_name): | 187 if encoding.named_range(class_name): |
| 189 c = encoding.named_range(class_name) | 188 c = encoding.named_range(class_name) |
| 190 if class_name in key_map: | 189 if class_name in key_map: |
| 191 assert key_map[class_name] == TransitionKey(encoding, c) | 190 assert key_map[class_name] == TransitionKey(encoding, c) |
| 192 components.append(c) | 191 components.append(c) |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 338 elif component.name() == 'NAMED_RANGE_KEY': | 337 elif component.name() == 'NAMED_RANGE_KEY': |
| 339 return component.args()[0] | 338 return component.args()[0] |
| 340 elif component.name() == 'EPSILON_KEY': | 339 elif component.name() == 'EPSILON_KEY': |
| 341 return 'epsilon' | 340 return 'epsilon' |
| 342 elif component.name() == 'OMEGA_KEY': | 341 elif component.name() == 'OMEGA_KEY': |
| 343 return 'omega' | 342 return 'omega' |
| 344 elif component.name() != 'NUMERIC_RANGE_KEY': | 343 elif component.name() != 'NUMERIC_RANGE_KEY': |
| 345 raise Exception('unprintable %s' % component) | 344 raise Exception('unprintable %s' % component) |
| 346 r = component.args() | 345 r = component.args() |
| 347 def to_str(x): | 346 def to_str(x): |
| 348 assert not encoding or encoding.in_primary_range(x) | 347 assert not encoding or encoding.in_primary_range(x, x) |
| 349 if x > 127: | 348 if x > 127: |
| 350 return str(x) | 349 return str(x) |
| 351 if not x in TransitionKey.__printable_cache: | 350 if not x in TransitionKey.__printable_cache: |
| 352 res = "'%s'" % chr(x) if chr(x) in printable else str(x) | 351 res = "'%s'" % chr(x) if chr(x) in printable else str(x) |
| 353 TransitionKey.__printable_cache[x] = res | 352 TransitionKey.__printable_cache[x] = res |
| 354 return TransitionKey.__printable_cache[x] | 353 return TransitionKey.__printable_cache[x] |
| 355 if r[0] == r[1]: | 354 if r[0] == r[1]: |
| 356 return '%s' % to_str(r[0]) | 355 return '%s' % to_str(r[0]) |
| 357 else: | 356 else: |
| 358 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) | 357 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 512 other_keys.add(x) | 511 other_keys.add(x) |
| 513 continue | 512 continue |
| 514 (start, end) = x.args() | 513 (start, end) = x.args() |
| 515 if not start in range_map: | 514 if not start in range_map: |
| 516 range_map[start] = [] | 515 range_map[start] = [] |
| 517 range_map[start].append(end) | 516 range_map[start].append(end) |
| 518 ranges = TransitionKey.__disjoint_keys(encoding, range_map) | 517 ranges = TransitionKey.__disjoint_keys(encoding, range_map) |
| 519 if merge_ranges: | 518 if merge_ranges: |
| 520 ranges = TransitionKey.__merge_ranges(encoding, ranges) | 519 ranges = TransitionKey.__merge_ranges(encoding, ranges) |
| 521 other_keys = sorted(other_keys, cmp=TransitionKey.__component_compare) | 520 other_keys = sorted(other_keys, cmp=TransitionKey.__component_compare) |
| 522 range_terms = map(lambda x : Term('NUMERIC_RANGE_KEY', x[0], x[1]), ranges) | 521 range_terms = map( |
| 522 lambda x : encoding.numeric_range_term(x[0], x[1]), ranges) |
| 523 return chain(iter(range_terms), iter(other_keys)) | 523 return chain(iter(range_terms), iter(other_keys)) |
| 524 | 524 |
| 525 @staticmethod | 525 @staticmethod |
| 526 def __key_from_components(encoding, invert, components): | 526 def __key_from_components(encoding, invert, components): |
| 527 components = TransitionKey.__disjoint_components(encoding, components, True) | 527 components = TransitionKey.__disjoint_components(encoding, components, True) |
| 528 if invert: | 528 if invert: |
| 529 components = TransitionKey.__invert_components(encoding, components) | 529 components = TransitionKey.__invert_components(encoding, components) |
| 530 return None if not components else TransitionKey(encoding, components) | 530 return None if not components else TransitionKey(encoding, components) |
| 531 | 531 |
| 532 @staticmethod | 532 @staticmethod |
| (...skipping 20 matching lines...) Expand all Loading... |
| 553 encoding, True, TransitionKey.__flatten_keys(keys)) | 553 encoding, True, TransitionKey.__flatten_keys(keys)) |
| 554 | 554 |
| 555 @staticmethod | 555 @staticmethod |
| 556 def merged_key(encoding, keys): | 556 def merged_key(encoding, keys): |
| 557 return TransitionKey(encoding, | 557 return TransitionKey(encoding, |
| 558 TransitionKey.__disjoint_components_from_keys(encoding, keys, True)) | 558 TransitionKey.__disjoint_components_from_keys(encoding, keys, True)) |
| 559 | 559 |
| 560 @staticmethod | 560 @staticmethod |
| 561 def __invert_components(encoding, components): | 561 def __invert_components(encoding, components): |
| 562 def key(x, y): | 562 def key(x, y): |
| 563 return Term('NUMERIC_RANGE_KEY', x, y) | 563 return encoding.numeric_range_term(x, y) |
| 564 last = None | 564 last = None |
| 565 classes = set(encoding.named_range_value_iter()) | 565 classes = set(encoding.named_range_value_iter()) |
| 566 for c in components: | 566 for c in components: |
| 567 if c in classes: | 567 if c in classes: |
| 568 classes.remove(c) | 568 classes.remove(c) |
| 569 continue | 569 continue |
| 570 assert c.name() == 'NUMERIC_RANGE_KEY', 'unencoded key not invertible' | 570 assert c.name() == 'NUMERIC_RANGE_KEY', 'unencoded key not invertible' |
| 571 r = c.args() | 571 r = c.args() |
| 572 assert encoding.is_primary_range(r) | 572 assert encoding.is_primary_range(r) |
| 573 if last == None: | 573 if last == None: |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 643 { | 643 { |
| 644 'whitespace': | 644 'whitespace': |
| 645 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)], | 645 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)], |
| 646 'letter': | 646 'letter': |
| 647 [(65, 90), (97, 122), ('non_primary_letter',)], | 647 [(65, 90), (97, 122), ('non_primary_letter',)], |
| 648 'line_terminator': | 648 'line_terminator': |
| 649 [(10, 10), (13, 13), ('non_primary_line_terminator',)], | 649 [(10, 10), (13, 13), ('non_primary_line_terminator',)], |
| 650 'identifier_part_not_letter': | 650 'identifier_part_not_letter': |
| 651 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], | 651 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], |
| 652 }) | 652 }) |
| OLD | NEW |