| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 from types import IntType | |
| 29 from itertools import chain | 28 from itertools import chain |
| 29 from encoding import KeyEncoding |
| 30 from action import Term | 30 from action import Term |
| 31 from string import printable | |
| 32 | |
| 33 class KeyEncoding(object): | |
| 34 | |
| 35 __encodings = {} | |
| 36 | |
| 37 @staticmethod | |
| 38 def get(name): | |
| 39 if not KeyEncoding.__encodings: | |
| 40 Latin1Encoding() | |
| 41 Utf16Encoding() | |
| 42 Utf8Encoding() | |
| 43 return KeyEncoding.__encodings[name] | |
| 44 | |
| 45 def __init__(self, name, primary_range, named_ranges, predefined_ranges): | |
| 46 assert not name in KeyEncoding.__encodings | |
| 47 assert primary_range[0] <= primary_range[1] | |
| 48 KeyEncoding.__encodings[name] = self | |
| 49 self.__name = name | |
| 50 self.__primary_range = primary_range | |
| 51 self.__lower_bound = primary_range[0] | |
| 52 self.__upper_bound = primary_range[1] | |
| 53 self.__primary_range_component = self.numeric_range_term(primary_range[0], | |
| 54 primary_range[1]) | |
| 55 self.__named_ranges = { | |
| 56 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges } | |
| 57 def f(v): | |
| 58 if len(v) == 2: | |
| 59 return self.numeric_range_term(v[0], v[1]) | |
| 60 elif len(v) == 1: | |
| 61 assert v[0] in self.__named_ranges | |
| 62 return self.__named_ranges[v[0]] | |
| 63 else: | |
| 64 raise Exception() | |
| 65 self.__predefined_ranges = { | |
| 66 k : map(f, v) for k, v in predefined_ranges.iteritems() } | |
| 67 | |
| 68 def name(self): | |
| 69 return self.__name | |
| 70 | |
| 71 def lower_bound(self): | |
| 72 return self.__lower_bound | |
| 73 | |
| 74 def upper_bound(self): | |
| 75 return self.__upper_bound | |
| 76 | |
| 77 def primary_range(self): | |
| 78 return self.__primary_range | |
| 79 | |
| 80 def named_range(self, name): | |
| 81 ranges = self.__named_ranges | |
| 82 return Term.empty_term() if not name in ranges else ranges[name] | |
| 83 | |
| 84 def named_range_iter(self): | |
| 85 return self.__named_range.iteritems() | |
| 86 | |
| 87 def named_range_key_iter(self): | |
| 88 return self.__named_ranges.iterkeys() | |
| 89 | |
| 90 def named_range_value_iter(self): | |
| 91 return self.__named_ranges.itervalues() | |
| 92 | |
| 93 def predefined_range_iter(self, name): | |
| 94 ranges = self.__predefined_ranges | |
| 95 return None if not name in ranges else iter(ranges[name]) | |
| 96 | |
| 97 def __primary_range_iter(self): | |
| 98 yield self.__primary_range_component | |
| 99 | |
| 100 def all_components_iter(self): | |
| 101 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues()) | |
| 102 | |
| 103 def is_primary_range(self, r): | |
| 104 assert len(r) == 2 | |
| 105 return self.in_primary_range(r[0], r[1]) | |
| 106 | |
| 107 def in_primary_range(self, a, b): | |
| 108 return self.lower_bound() <= a and b <= self.upper_bound() | |
| 109 | |
| 110 def numeric_range_term(self, a, b): | |
| 111 assert type(a) == IntType and type(b) == IntType | |
| 112 assert self.in_primary_range(a, b) | |
| 113 return Term('NUMERIC_RANGE_KEY', a, b) | |
| 114 | 31 |
| 115 class TransitionKey(object): | 32 class TransitionKey(object): |
| 116 '''Represents a transition from a state in DFA or NFA to another state. | 33 '''Represents a transition from a state in DFA or NFA to another state. |
| 117 | 34 |
| 118 A transition key has a list of character ranges and a list of class ranges | 35 A transition key has a list of character ranges and a list of class ranges |
| 119 (e.g., "whitespace"), defining for which characters the transition | 36 (e.g., "whitespace"), defining for which characters the transition |
| 120 happens. When we generate code based on the transition key, the character | 37 happens. When we generate code based on the transition key, the character |
| 121 ranges generate simple checks and the class ranges generate more complicated | 38 ranges generate simple checks and the class ranges generate more complicated |
| 122 conditions, e.g., function calls.''' | 39 conditions, e.g., function calls.''' |
| 123 | 40 |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 292 | 209 |
| 293 def __hash__(self): | 210 def __hash__(self): |
| 294 return hash(self.__term) | 211 return hash(self.__term) |
| 295 | 212 |
| 296 def __ne__(self, other): | 213 def __ne__(self, other): |
| 297 return not self == other | 214 return not self == other |
| 298 | 215 |
| 299 def __eq__(self, other): | 216 def __eq__(self, other): |
| 300 return isinstance(other, TransitionKey) and self.__term == other.__term | 217 return isinstance(other, TransitionKey) and self.__term == other.__term |
| 301 | 218 |
| 302 @staticmethod | |
| 303 def __class_name(encoding, r): | |
| 304 for name, v in encoding.class_range_iter(): | |
| 305 if r == v: | |
| 306 return name | |
| 307 assert False | |
| 308 | |
| 309 @staticmethod | |
| 310 def __unique_name(r): | |
| 311 for name, v in TransitionKey.__cached_keys['no_encoding'].items(): | |
| 312 if v.__ranges and r == v.__ranges[0]: | |
| 313 return name[2:] | |
| 314 assert False | |
| 315 | |
| 316 def range_iter(self, encoding): | 219 def range_iter(self, encoding): |
| 317 for c in self.__flatten(): | 220 for c in self.__flatten(): |
| 318 if c.name() == 'NUMERIC_RANGE_KEY': | 221 if c.name() == 'NUMERIC_RANGE_KEY': |
| 319 yield ('PRIMARY_RANGE', (c.args()[0], c.args()[1])) | 222 yield ('PRIMARY_RANGE', (c.args()[0], c.args()[1])) |
| 320 elif c.name() == 'NAMED_RANGE_KEY': | 223 elif c.name() == 'NAMED_RANGE_KEY': |
| 321 yield ('CLASS', c.args()[0]) | 224 yield ('CLASS', c.args()[0]) |
| 322 elif c.name() == 'TERM_KEY': | 225 elif c.name() == 'TERM_KEY': |
| 323 yield ('UNIQUE', c.args()[0]) | 226 yield ('UNIQUE', c.args()[0]) |
| 324 else: | 227 else: |
| 325 assert False, 'unimplemented %s' % c | 228 assert False, 'unimplemented %s' % c |
| 326 | 229 |
| 327 __printable_cache = { | |
| 328 ord('\t') : '\\t', | |
| 329 ord('\n') : '\\n', | |
| 330 ord('\r') : '\\r', | |
| 331 } | |
| 332 | |
| 333 @staticmethod | 230 @staticmethod |
| 334 def __component_str(encoding, component): | 231 def __component_str(encoding, component): |
| 335 if component.name() == 'TERM_KEY': | 232 if component.name() == 'TERM_KEY': |
| 336 return component.args()[0] | 233 return component.args()[0] |
| 337 elif component.name() == 'NAMED_RANGE_KEY': | 234 elif component.name() == 'NAMED_RANGE_KEY': |
| 338 return component.args()[0] | 235 return component.args()[0] |
| 339 elif component.name() == 'EPSILON_KEY': | 236 elif component.name() == 'EPSILON_KEY': |
| 340 return 'epsilon' | 237 return 'epsilon' |
| 341 elif component.name() == 'OMEGA_KEY': | 238 elif component.name() == 'OMEGA_KEY': |
| 342 return 'omega' | 239 return 'omega' |
| 343 elif component.name() != 'NUMERIC_RANGE_KEY': | 240 elif component.name() == 'NUMERIC_RANGE_KEY': |
| 344 raise Exception('unprintable %s' % component) | 241 r = component.args() |
| 345 r = component.args() | 242 to_str = lambda x: KeyEncoding.to_str(encoding, x) |
| 346 def to_str(x): | 243 if r[0] == r[1]: |
| 347 assert not encoding or encoding.in_primary_range(x, x) | 244 return '%s' % to_str(r[0]) |
| 348 if x > 127: | |
| 349 return str(x) | |
| 350 if not x in TransitionKey.__printable_cache: | |
| 351 res = "'%s'" % chr(x) if chr(x) in printable else str(x) | |
| 352 TransitionKey.__printable_cache[x] = res | |
| 353 return TransitionKey.__printable_cache[x] | |
| 354 if r[0] == r[1]: | |
| 355 return '%s' % to_str(r[0]) | |
| 356 else: | |
| 357 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) | 245 return '[%s-%s]' % (to_str(r[0]), to_str(r[1])) |
| 246 raise Exception('unprintable %s' % component) |
| 358 | 247 |
| 359 def __flatten(self): | 248 def __flatten(self): |
| 360 return self.__flatten_components([self.__term]) | 249 return self.__flatten_components([self.__term]) |
| 361 | 250 |
| 362 @staticmethod | 251 @staticmethod |
| 363 def __flatten_components(components): | 252 def __flatten_components(components): |
| 364 for component in components: | 253 for component in components: |
| 365 if component.name() != 'COMPOSITE_KEY': | 254 if component.name() != 'COMPOSITE_KEY': |
| 366 yield component | 255 yield component |
| 367 else: | 256 else: |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 409 (a, b, sign) = (b, a, 1) # normalize ordering so that a0 < b0 | 298 (a, b, sign) = (b, a, 1) # normalize ordering so that a0 < b0 |
| 410 assert a[0] < b[0] | 299 assert a[0] < b[0] |
| 411 if b[1] <= a[1]: # subset | 300 if b[1] <= a[1]: # subset |
| 412 return 3 * sign | 301 return 3 * sign |
| 413 if a[1] < b[0]: # non overlap | 302 if a[1] < b[0]: # non overlap |
| 414 return 2 * sign | 303 return 2 * sign |
| 415 return 5 * sign # partial overlap | 304 return 5 * sign # partial overlap |
| 416 | 305 |
| 417 @staticmethod | 306 @staticmethod |
| 418 def __construct(encoding, components): | 307 def __construct(encoding, components): |
| 308 if isinstance(components, Term): |
| 309 components = [components] |
| 419 is_unique = False | 310 is_unique = False |
| 420 acc = [] | 311 acc = [] |
| 421 last = Term.empty_term() | 312 last = Term.empty_term() |
| 422 for current in TransitionKey.__flatten_components(components): | 313 for current in TransitionKey.__flatten_components(components): |
| 423 name = current.name() | 314 name = current.name() |
| 424 if last: | 315 if last: |
| 425 assert name != 'EPSILON_KEY' and name != 'OMEGA_KEY', 'cannot merge' | 316 assert name != 'EPSILON_KEY' and name != 'OMEGA_KEY', 'cannot merge' |
| 426 c = TransitionKey.__component_compare(last, current) | 317 c = TransitionKey.__component_compare(last, current) |
| 427 assert c == -1 or c == -2, 'bad order %s %s' % (str(last), str(current)) | 318 assert c == -1 or c == -2, 'bad order %s %s' % (str(last), str(current)) |
| 428 if name == 'EPSILON_KEY' or name == 'OMEGA_KEY' or name == 'TERM_KEY': | 319 if name == 'EPSILON_KEY' or name == 'OMEGA_KEY' or name == 'TERM_KEY': |
| 429 pass | 320 pass |
| 430 elif name == 'NUMERIC_RANGE_KEY': | 321 elif name == 'NUMERIC_RANGE_KEY': |
| 431 assert encoding.is_primary_range(current.args()) | 322 assert encoding.is_primary_range(current.args()) |
| 432 if last.name() == 'NUMERIC_RANGE_KEY': | 323 if last.name() == 'NUMERIC_RANGE_KEY': |
| 433 assert last.args()[1] + 1 < current.args()[0], 'ranges must be merged' | 324 assert last.args()[1] + 1 < current.args()[0], 'ranges must be merged' |
| 434 elif name == 'NAMED_RANGE_KEY': | 325 elif name == 'NAMED_RANGE_KEY': |
| 435 assert encoding.named_range(current.args()[0]) | 326 assert encoding.named_range(current.args()[0]) |
| 436 else: | 327 else: |
| 437 raise Exception('illegal component %s' % str(current)) | 328 raise Exception('illegal component %s' % str(current)) |
| 438 acc.append(current) | 329 acc.append(current) |
| 439 last = current | 330 last = current |
| 440 assert acc, "must have components" | 331 assert acc, "must have components" |
| 441 return acc[0] if len(acc) == 1 else Term('COMPOSITE_KEY', *acc) | 332 return acc[0] if len(acc) == 1 else Term('COMPOSITE_KEY', *acc) |
| 442 | 333 |
| 443 def __init__(self, encoding, components): | 334 def __init__(self, encoding, components): |
| 444 if isinstance(components, Term): | |
| 445 components = [components] | |
| 446 self.__term = TransitionKey.__construct(encoding, components) | 335 self.__term = TransitionKey.__construct(encoding, components) |
| 447 self.__cached_hash = None | 336 self.__cached_hash = None |
| 448 | 337 |
| 449 def to_string(self, encoding): | 338 def to_string(self, encoding): |
| 450 return ', '.join(map(lambda x : TransitionKey.__component_str(encoding, x), | 339 return ', '.join(map(lambda x : TransitionKey.__component_str(encoding, x), |
| 451 self.__flatten())) | 340 self.__flatten())) |
| 452 | 341 |
| 453 def __str__(self): | 342 def __str__(self): |
| 454 return self.to_string(None) | 343 return self.to_string(None) |
| 455 | 344 |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 552 return TransitionKey.__key_from_components( | 441 return TransitionKey.__key_from_components( |
| 553 encoding, True, TransitionKey.__flatten_keys(keys)) | 442 encoding, True, TransitionKey.__flatten_keys(keys)) |
| 554 | 443 |
| 555 @staticmethod | 444 @staticmethod |
| 556 def merged_key(encoding, keys): | 445 def merged_key(encoding, keys): |
| 557 return TransitionKey(encoding, | 446 return TransitionKey(encoding, |
| 558 TransitionKey.__disjoint_components_from_keys(encoding, keys, True)) | 447 TransitionKey.__disjoint_components_from_keys(encoding, keys, True)) |
| 559 | 448 |
| 560 @staticmethod | 449 @staticmethod |
| 561 def __invert_components(encoding, components): | 450 def __invert_components(encoding, components): |
| 562 def key(x, y): | 451 key = lambda x, y: encoding.numeric_range_term(x, y) |
| 563 return encoding.numeric_range_term(x, y) | |
| 564 last = None | 452 last = None |
| 565 classes = set(encoding.named_range_value_iter()) | 453 classes = set(encoding.named_range_value_iter()) |
| 566 for c in components: | 454 for c in components: |
| 567 if c in classes: | 455 if c in classes: |
| 568 classes.remove(c) | 456 classes.remove(c) |
| 569 continue | 457 continue |
| 570 assert c.name() == 'NUMERIC_RANGE_KEY', 'unencoded key not invertible' | 458 assert c.name() == 'NUMERIC_RANGE_KEY', 'unencoded key not invertible' |
| 571 r = c.args() | 459 r = c.args() |
| 572 assert encoding.is_primary_range(r) | 460 assert encoding.is_primary_range(r) |
| 573 if last == None: | 461 if last == None: |
| 574 if r[0] != encoding.lower_bound(): | 462 if r[0] != encoding.lower_bound(): |
| 575 yield key(encoding.lower_bound(), r[0] - 1) | 463 yield key(encoding.lower_bound(), r[0] - 1) |
| 576 elif last[1] + 1 < r[0]: | 464 elif last[1] + 1 < r[0]: |
| 577 yield key(last[1] + 1, r[0] - 1) | 465 yield key(last[1] + 1, r[0] - 1) |
| 578 last = r | 466 last = r |
| 579 upper_bound = encoding.primary_range()[1] | 467 upper_bound = encoding.primary_range()[1] |
| 580 if last == None: | 468 if last == None: |
| 581 r = encoding.primary_range() | 469 r = encoding.primary_range() |
| 582 yield key(r[0], r[1]) | 470 yield key(r[0], r[1]) |
| 583 elif last[1] < upper_bound: | 471 elif last[1] < upper_bound: |
| 584 yield key(last[1] + 1, upper_bound) | 472 yield key(last[1] + 1, upper_bound) |
| 585 for c in sorted(classes, TransitionKey.__component_compare): | 473 for c in sorted(classes, TransitionKey.__component_compare): |
| 586 yield c | 474 yield c |
| 587 | |
| 588 class Latin1Encoding(KeyEncoding): | |
| 589 | |
| 590 def __init__(self): | |
| 591 super(Latin1Encoding, self).__init__( | |
| 592 'latin1', | |
| 593 (0, 255), | |
| 594 [], | |
| 595 { | |
| 596 'whitespace': | |
| 597 [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)], | |
| 598 'letter': | |
| 599 [(65, 90), (97, 122), (170, 170), (181, 181), | |
| 600 (186, 186), (192, 214), (216, 246), (248, 255)], | |
| 601 'line_terminator': | |
| 602 [(10, 10), (13, 13)], | |
| 603 'identifier_part_not_letter': | |
| 604 [(48, 57), (95, 95)] | |
| 605 }) | |
| 606 | |
| 607 class Utf16Encoding(KeyEncoding): | |
| 608 | |
| 609 def __init__(self): | |
| 610 super(Utf16Encoding, self).__init__( | |
| 611 'utf16', | |
| 612 (0, 255), | |
| 613 ['non_primary_whitespace', | |
| 614 'non_primary_letter', | |
| 615 'non_primary_identifier_part_not_letter', | |
| 616 'non_primary_line_terminator', | |
| 617 'non_primary_everything_else'], | |
| 618 { | |
| 619 'whitespace': | |
| 620 [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160), | |
| 621 ('non_primary_whitespace',)], | |
| 622 'letter': | |
| 623 [(65, 90), (97, 122), (170, 170), (181, 181), | |
| 624 (186, 186), (192, 214), (216, 246), (248, 255), | |
| 625 ('non_primary_letter',)], | |
| 626 'line_terminator': | |
| 627 [(10, 10), (13, 13), ('non_primary_line_terminator',)], | |
| 628 'identifier_part_not_letter': | |
| 629 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], | |
| 630 }) | |
| 631 | |
| 632 class Utf8Encoding(KeyEncoding): | |
| 633 | |
| 634 def __init__(self): | |
| 635 super(Utf8Encoding, self).__init__( | |
| 636 'utf8', | |
| 637 (0, 127), | |
| 638 ['non_primary_whitespace', | |
| 639 'non_primary_letter', | |
| 640 'non_primary_identifier_part_not_letter', | |
| 641 'non_primary_line_terminator', | |
| 642 'non_primary_everything_else'], | |
| 643 { | |
| 644 'whitespace': | |
| 645 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)], | |
| 646 'letter': | |
| 647 [(65, 90), (97, 122), ('non_primary_letter',)], | |
| 648 'line_terminator': | |
| 649 [(10, 10), (13, 13), ('non_primary_line_terminator',)], | |
| 650 'identifier_part_not_letter': | |
| 651 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], | |
| 652 }) | |
| OLD | NEW |