Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(220)

Side by Side Diff: tools/lexer_generator/encoding.py

Issue 159753009: Experimental parser: break KeyEncoding off into its own file (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are
4 # met:
5 #
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 from types import IntType
29 from itertools import chain
30 from string import printable
31 from action import Term
32
33 class KeyEncoding(object):
34
35 __encodings = {}
36
37 __printable_cache = {
38 ord('\t') : '\\t',
39 ord('\n') : '\\n',
40 ord('\r') : '\\r',
41 }
42
43 @staticmethod
44 def to_str(encoding, x):
45 assert not encoding or encoding.in_primary_range(x, x)
46 if x > 127:
47 return str(x)
48 if not x in KeyEncoding.__printable_cache:
49 res = "'%s'" % chr(x) if chr(x) in printable else str(x)
50 KeyEncoding.__printable_cache[x] = res
51 return KeyEncoding.__printable_cache[x]
52
53 @staticmethod
54 def get(name):
55 if not KeyEncoding.__encodings:
56 Latin1Encoding()
57 Utf16Encoding()
58 Utf8Encoding()
59 return KeyEncoding.__encodings[name]
60
61 def __init__(self, name, primary_range, named_ranges, predefined_ranges):
62 assert not name in KeyEncoding.__encodings
63 assert primary_range[0] <= primary_range[1]
64 KeyEncoding.__encodings[name] = self
65 self.__name = name
66 self.__primary_range = primary_range
67 self.__lower_bound = primary_range[0]
68 self.__upper_bound = primary_range[1]
69 self.__primary_range_component = self.numeric_range_term(primary_range[0],
70 primary_range[1])
71 self.__named_ranges = {
72 k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
73 def f(v):
74 if len(v) == 2:
75 return self.numeric_range_term(v[0], v[1])
76 elif len(v) == 1:
77 assert v[0] in self.__named_ranges
78 return self.__named_ranges[v[0]]
79 raise Exception('bad args %s' % str(v))
80 self.__predefined_ranges = {
81 k : map(f, v) for k, v in predefined_ranges.iteritems() }
82
83 def name(self):
84 return self.__name
85
86 def lower_bound(self):
87 return self.__lower_bound
88
89 def upper_bound(self):
90 return self.__upper_bound
91
92 def primary_range(self):
93 return self.__primary_range
94
95 def named_range(self, name):
96 ranges = self.__named_ranges
97 return Term.empty_term() if not name in ranges else ranges[name]
98
99 def named_range_iter(self):
100 return self.__named_range.iteritems()
101
102 def named_range_key_iter(self):
103 return self.__named_ranges.iterkeys()
104
105 def named_range_value_iter(self):
106 return self.__named_ranges.itervalues()
107
108 def predefined_range_iter(self, name):
109 ranges = self.__predefined_ranges
110 return None if not name in ranges else iter(ranges[name])
111
112 def __primary_range_iter(self):
113 yield self.__primary_range_component
114
115 def all_components_iter(self):
116 return chain(self.__primary_range_iter(), self.__named_ranges.itervalues())
117
118 def is_primary_range(self, r):
119 assert len(r) == 2
120 return self.in_primary_range(r[0], r[1])
121
122 def in_primary_range(self, a, b):
123 return self.lower_bound() <= a and b <= self.upper_bound()
124
125 def numeric_range_term(self, a, b):
126 assert type(a) == IntType and type(b) == IntType
127 assert self.in_primary_range(a, b)
128 return Term('NUMERIC_RANGE_KEY', a, b)
129
130 class Latin1Encoding(KeyEncoding):
131
132 def __init__(self):
133 super(Latin1Encoding, self).__init__(
134 'latin1',
135 (0, 255),
136 [],
137 {
138 'whitespace':
139 [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)],
140 'letter':
141 [(65, 90), (97, 122), (170, 170), (181, 181),
142 (186, 186), (192, 214), (216, 246), (248, 255)],
143 'line_terminator':
144 [(10, 10), (13, 13)],
145 'identifier_part_not_letter':
146 [(48, 57), (95, 95)]
147 })
148
149 class Utf16Encoding(KeyEncoding):
150
151 def __init__(self):
152 super(Utf16Encoding, self).__init__(
153 'utf16',
154 (0, 255),
155 ['non_primary_whitespace',
156 'non_primary_letter',
157 'non_primary_identifier_part_not_letter',
158 'non_primary_line_terminator',
159 'non_primary_everything_else'],
160 {
161 'whitespace':
162 [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
163 ('non_primary_whitespace',)],
164 'letter':
165 [(65, 90), (97, 122), (170, 170), (181, 181),
166 (186, 186), (192, 214), (216, 246), (248, 255),
167 ('non_primary_letter',)],
168 'line_terminator':
169 [(10, 10), (13, 13), ('non_primary_line_terminator',)],
170 'identifier_part_not_letter':
171 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
172 })
173
174 class Utf8Encoding(KeyEncoding):
175
176 def __init__(self):
177 super(Utf8Encoding, self).__init__(
178 'utf8',
179 (0, 127),
180 ['non_primary_whitespace',
181 'non_primary_letter',
182 'non_primary_identifier_part_not_letter',
183 'non_primary_line_terminator',
184 'non_primary_everything_else'],
185 {
186 'whitespace':
187 [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
188 'letter':
189 [(65, 90), (97, 122), ('non_primary_letter',)],
190 'line_terminator':
191 [(10, 10), (13, 13), ('non_primary_line_terminator',)],
192 'identifier_part_not_letter':
193 [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
194 })
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698