| OLD | NEW |
| (Empty) |
| 1 # Copyright 2014 the V8 project authors. All rights reserved. | |
| 2 # Redistribution and use in source and binary forms, with or without | |
| 3 # modification, are permitted provided that the following conditions are | |
| 4 # met: | |
| 5 # | |
| 6 # * Redistributions of source code must retain the above copyright | |
| 7 # notice, this list of conditions and the following disclaimer. | |
| 8 # * Redistributions in binary form must reproduce the above | |
| 9 # copyright notice, this list of conditions and the following | |
| 10 # disclaimer in the documentation and/or other materials provided | |
| 11 # with the distribution. | |
| 12 # * Neither the name of Google Inc. nor the names of its | |
| 13 # contributors may be used to endorse or promote products derived | |
| 14 # from this software without specific prior written permission. | |
| 15 # | |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | |
| 28 from types import IntType | |
| 29 from itertools import chain | |
| 30 from string import printable | |
| 31 from action import Term | |
| 32 | |
| 33 class KeyEncoding(object): | |
| 34 | |
| 35 __encodings = {} | |
| 36 | |
| 37 __printable_cache = { | |
| 38 ord('\t') : '\\t', | |
| 39 ord('\n') : '\\n', | |
| 40 ord('\r') : '\\r', | |
| 41 } | |
| 42 | |
| 43 @staticmethod | |
| 44 def to_str(encoding, x): | |
| 45 assert not encoding or encoding.in_primary_range(x, x) | |
| 46 if x > 127: | |
| 47 return str(x) | |
| 48 if not x in KeyEncoding.__printable_cache: | |
| 49 res = "%s" % chr(x) if chr(x) in printable else str(x) | |
| 50 KeyEncoding.__printable_cache[x] = res | |
| 51 return KeyEncoding.__printable_cache[x] | |
| 52 | |
| 53 @staticmethod | |
| 54 def get(name): | |
| 55 if not KeyEncoding.__encodings: | |
| 56 Latin1Encoding() | |
| 57 Utf16Encoding() | |
| 58 Utf8Encoding() | |
| 59 return KeyEncoding.__encodings[name] | |
| 60 | |
| 61 def __init__(self, name, primary_range, named_ranges, predefined_ranges): | |
| 62 assert not name in KeyEncoding.__encodings | |
| 63 assert primary_range[0] <= primary_range[1] | |
| 64 KeyEncoding.__encodings[name] = self | |
| 65 self.__name = name | |
| 66 self.__primary_range = primary_range | |
| 67 self.__lower_bound = primary_range[0] | |
| 68 self.__upper_bound = primary_range[1] | |
| 69 self.__primary_range_component = self.numeric_range_term(primary_range[0], | |
| 70 primary_range[1]) | |
| 71 self.__named_ranges = { | |
| 72 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges } | |
| 73 def f(v): | |
| 74 if len(v) == 2: | |
| 75 return self.numeric_range_term(v[0], v[1]) | |
| 76 elif len(v) == 1: | |
| 77 assert v[0] in self.__named_ranges | |
| 78 return self.__named_ranges[v[0]] | |
| 79 raise Exception('bad args %s' % str(v)) | |
| 80 self.__predefined_ranges = { | |
| 81 k : map(f, v) for k, v in predefined_ranges.iteritems() } | |
| 82 | |
| 83 def name(self): | |
| 84 return self.__name | |
| 85 | |
| 86 def lower_bound(self): | |
| 87 return self.__lower_bound | |
| 88 | |
| 89 def upper_bound(self): | |
| 90 return self.__upper_bound | |
| 91 | |
| 92 def primary_range(self): | |
| 93 return self.__primary_range | |
| 94 | |
| 95 def named_range(self, name): | |
| 96 ranges = self.__named_ranges | |
| 97 return Term.empty_term() if not name in ranges else ranges[name] | |
| 98 | |
| 99 def named_range_iter(self): | |
| 100 return self.__named_range.iteritems() | |
| 101 | |
| 102 def named_range_key_iter(self): | |
| 103 return self.__named_ranges.iterkeys() | |
| 104 | |
| 105 def named_range_value_iter(self): | |
| 106 return self.__named_ranges.itervalues() | |
| 107 | |
| 108 def predefined_range_iter(self, name): | |
| 109 ranges = self.__predefined_ranges | |
| 110 return None if not name in ranges else iter(ranges[name]) | |
| 111 | |
| 112 def __primary_range_iter(self): | |
| 113 yield self.__primary_range_component | |
| 114 | |
| 115 def all_components_iter(self): | |
| 116 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues()) | |
| 117 | |
| 118 def is_primary_range(self, r): | |
| 119 assert len(r) == 2 | |
| 120 return self.in_primary_range(r[0], r[1]) | |
| 121 | |
| 122 def in_primary_range(self, a, b): | |
| 123 return self.lower_bound() <= a and b <= self.upper_bound() | |
| 124 | |
| 125 def numeric_range_term(self, a, b): | |
| 126 assert type(a) == IntType and type(b) == IntType | |
| 127 assert self.in_primary_range(a, b) | |
| 128 return Term('NUMERIC_RANGE_KEY', a, b) | |
| 129 | |
| 130 class Latin1Encoding(KeyEncoding): | |
| 131 | |
| 132 def __init__(self): | |
| 133 super(Latin1Encoding, self).__init__( | |
| 134 'latin1', | |
| 135 (0, 255), | |
| 136 [], | |
| 137 { | |
| 138 'whitespace': | |
| 139 [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)], | |
| 140 'letter': | |
| 141 [(65, 90), (97, 122), (170, 170), (181, 181), | |
| 142 (186, 186), (192, 214), (216, 246), (248, 255)], | |
| 143 'line_terminator': | |
| 144 [(10, 10), (13, 13)], | |
| 145 'identifier_part_not_letter': | |
| 146 [(48, 57), (95, 95)] | |
| 147 }) | |
| 148 | |
| 149 class Utf16Encoding(KeyEncoding): | |
| 150 | |
| 151 def __init__(self): | |
| 152 super(Utf16Encoding, self).__init__( | |
| 153 'utf16', | |
| 154 (0, 255), | |
| 155 ['non_primary_whitespace', | |
| 156 'non_primary_letter', | |
| 157 'non_primary_identifier_part_not_letter', | |
| 158 'non_primary_line_terminator', | |
| 159 'non_primary_everything_else'], | |
| 160 { | |
| 161 'whitespace': | |
| 162 [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160), | |
| 163 ('non_primary_whitespace',)], | |
| 164 'letter': | |
| 165 [(65, 90), (97, 122), (170, 170), (181, 181), | |
| 166 (186, 186), (192, 214), (216, 246), (248, 255), | |
| 167 ('non_primary_letter',)], | |
| 168 'line_terminator': | |
| 169 [(10, 10), (13, 13), ('non_primary_line_terminator',)], | |
| 170 'identifier_part_not_letter': | |
| 171 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], | |
| 172 }) | |
| 173 | |
| 174 class Utf8Encoding(KeyEncoding): | |
| 175 | |
| 176 def __init__(self): | |
| 177 super(Utf8Encoding, self).__init__( | |
| 178 'utf8', | |
| 179 (0, 127), | |
| 180 ['non_primary_whitespace', | |
| 181 'non_primary_letter', | |
| 182 'non_primary_identifier_part_not_letter', | |
| 183 'non_primary_line_terminator', | |
| 184 'non_primary_everything_else'], | |
| 185 { | |
| 186 'whitespace': | |
| 187 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)], | |
| 188 'letter': | |
| 189 [(65, 90), (97, 122), ('non_primary_letter',)], | |
| 190 'line_terminator': | |
| 191 [(10, 10), (13, 13), ('non_primary_line_terminator',)], | |
| 192 'identifier_part_not_letter': | |
| 193 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)], | |
| 194 }) | |
| OLD | NEW |