OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (C) 2013 Google Inc. All rights reserved. | |
2 # | |
3 # Redistribution and use in source and binary forms, with or without | |
4 # modification, are permitted provided that the following conditions are | |
5 # met: | |
6 # | |
7 # * Redistributions of source code must retain the above copyright | |
8 # notice, this list of conditions and the following disclaimer. | |
9 # * Redistributions in binary form must reproduce the above | |
10 # copyright notice, this list of conditions and the following disclaimer | |
11 # in the documentation and/or other materials provided with the | |
12 # distribution. | |
13 # * Neither the name of Google Inc. nor the names of its | |
14 # contributors may be used to endorse or promote products derived from | |
15 # this software without specific prior written permission. | |
16 # | |
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | |
29 """Parser for Blink IDL. | |
30 | |
31 The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing | |
32 rules which understand the Blink dialect of Web IDL. | |
33 It derives from a standard Web IDL parser, overriding rules where Blink IDL | |
34 differs syntactically or semantically from the base parser, or where the base | |
35 parser diverges from the Web IDL standard. | |
36 | |
37 Web IDL: | |
38 http://www.w3.org/TR/WebIDL/ | |
39 Web IDL Grammar: | |
40 http://www.w3.org/TR/WebIDL/#idl-grammar | |
41 PLY: | |
42 http://www.dabeaz.com/ply/ | |
43 """ | |
44 | |
45 # Disable check for line length and Member as Function due to how grammar rules | |
46 # are defined with PLY | |
47 # | |
48 # pylint: disable=R0201 | |
49 # pylint: disable=C0301 | |
50 # | |
51 # Disable attribute validation, as lint can't import parent class to check | |
52 # pylint: disable=E1101 | |
haraken
2013/07/16 14:17:51
Is this comment helpful?
Nils Barth (inactive)
2013/07/17 12:05:09
(As above.) Yes, quiets pylint error.
| |
53 | |
54 import os.path | |
55 import sys | |
56 | |
57 # PLY is in Chromium src/third_party/ply | |
58 module_path, module_name = os.path.split(__file__) | |
59 third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pard ir) | |
60 sys.path.append(third_party) | |
haraken
2013/07/16 14:17:51
Instead of writing the relative path here, let's p
Nils Barth (inactive)
2013/07/17 12:05:09
(See separate response.)
| |
61 from ply import yacc | |
62 | |
63 # Base parser is in Chromium src/tools/idl_parser | |
64 tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir , os.pardir, 'tools') | |
65 sys.path.append(tools_dir) | |
haraken
2013/07/16 14:17:51
Ditto.
Nils Barth (inactive)
2013/07/17 12:05:09
(Ditto.)
| |
66 # Don't change case of ListFromConcat, for consistency with base parser | |
haraken
2013/07/16 14:17:51
Nit: I'd remove this comment.
Nils Barth (inactive)
2013/07/17 12:05:09
Done.
| |
67 from idl_parser.idl_parser import IDLParser, ListFromConcat | |
68 # Change function name, due to different Chromium/Blink convention | |
haraken
2013/07/16 14:17:51
Ditto.
Nils Barth (inactive)
2013/07/17 12:05:09
Done.
| |
69 from idl_parser.idl_parser import ParseFile as parse_file | |
haraken
2013/07/16 14:17:51
Looks like parse_file is unused.
Nils Barth (inactive)
2013/07/17 12:05:09
parse_file is used in idl_reader (it’s a simple ut
| |
70 | |
71 from blink_idl_lexer import BlinkIDLLexer | |
72 | |
73 # We ignore comments, but base parser preserves them | |
74 # FIXME: Upstream: comments should be removed in base parser | |
75 REMOVED_RULES = ['Comments', # [0.1] | |
76 'CommentsRest', # [0.2] | |
77 ] | |
78 | |
79 | |
80 class BlinkIDLParser(IDLParser): | |
81 # Below are grammar rules used by yacc, given by functions named p_<RULE>. | |
82 # * The docstring is the production rule in BNF (grammar). | |
83 # * The body is the yacc action (semantics). | |
84 # Reference: | |
85 # http://www.dabeaz.com/ply/ply.html#ply_nn23 | |
86 # | |
87 # Review of yacc: | |
haraken
2013/07/16 14:17:51
Great summary! This is the easiest yacc tutorial I
Nils Barth (inactive)
2013/07/17 12:05:09
Thanks! (#^_^#)
| |
88 # Yacc parses a token stream, internally producing a Concrete Syntax Tree | |
89 # (CST), where each node corresponds to a production rule in the grammar. | |
90 # At each node, it runs an action, which is usually "produce a node in the | |
91 # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST | |
92 # that aren't included in the AST, since only needed for parsing). | |
93 # | |
94 # The rules use pseudo-variables; in PLY syntax: | |
95 # p[0] is the left side: assign return value to p[0] instead of returning, | |
96 # p[1] ... p[n] are the right side: the values can be accessed, and they | |
97 # can be modified. | |
98 # (In yacc these are $$ and $1 ... $n.) | |
99 # | |
100 # The rules can look cryptic at first, but there are a few standard | |
101 # transforms from the CST to AST. With these in mind, the actions should | |
102 # be reasonably legible. | |
103 # | |
104 # * Ignore production | |
105 # Discard this branch. Primarily used when one alternative is empty. | |
106 # | |
107 # Sample code: | |
108 # if len(p) > 1: | |
109 # p[0] = ... | |
110 # # Note no assignment if len(p) == 1 | |
111 # | |
112 # * Eliminate singleton production | |
113 # Discard this node in the CST, pass the next level down up the tree. | |
114 # Used to ignore productions only necessary for parsing, but not needed | |
115 # in the AST. | |
116 # | |
117 # Sample code: | |
118 # p[0] = p[1] | |
119 # | |
120 # * Build node | |
121 # The key type of rule. In this parser, produces object of class IDLNode. | |
122 # There are several helper functions; see base idl_parser.py for | |
123 # definitions and more examples of use. | |
124 # | |
125 # Sample code: | |
126 # # Build node of type NodeType, with value p[1], and children. | |
127 # p[0] = self.BuildProduction('NodeType', p, 1, children) | |
128 # | |
129 # # Build named node of type NodeType, with name and value p[1]. | |
haraken
2013/07/16 14:17:51
I'm just curious: What's the difference between a
Nils Barth (inactive)
2013/07/17 12:05:09
A named node also has the attribute ‘NAME’ set; th
| |
130 # # (children optional) | |
131 # p[0] = self.BuildNamed('NodeType', p, 1) | |
132 # | |
133 # # Make a list | |
134 # # Used if one node has several children. | |
135 # children = ListFromConcat(p[2], p[3]) | |
136 # p[0] = self.BuildProduction('NodeType', p, 1, children) | |
137 # | |
138 # # Also used to collapse the right-associative tree | |
139 # # produced by parsing a list back into a single list. | |
140 # """Foos : Foo Foos | |
141 # |""" | |
142 # if len(p) > 1: | |
143 # p[0] = ListFromConcat(p[1], p[2]) | |
144 # | |
145 # # Add children. | |
146 # # Primarily used to add attributes, produced via BuildTrue. | |
147 # # p_StaticAttribute | |
148 # """StaticAttribute : STATIC Attribute""" | |
149 # p[2].AddChildren(self.BuildTrue('STATIC')) | |
150 # p[0] = p[2] | |
151 # | |
haraken
2013/07/16 14:17:51
You might want to add an explanation for self.Buil
Nils Barth (inactive)
2013/07/17 12:05:09
Got it, done. (Also BuildTrue.)
| |
152 # Numbering scheme for the rules is: | |
153 # [1] for Web IDL spec (or additions in base parser) | |
154 # These should all be upstreamed to the base parser. | |
155 # [b1] for Blink IDL changes (overrides Web IDL) | |
156 # [b1.1] for Blink IDL additions, auxiliary rules for [b1] | |
haraken
2013/07/16 14:17:51
The [X] numbering is fragile. As far as I see the
Nils Barth (inactive)
2013/07/17 12:05:09
This is for consistency with the base Pepper parse
haraken
2013/07/21 14:31:50
Makes sense.
| |
157 | |
158 # [0] Override grammar, since we strip comments | |
159 # (not in Web IDL) | |
160 # FIXME: Upstream | |
161 def p_Top(self, p): | |
162 """Top : Definitions""" | |
163 p[0] = p[1] | |
164 | |
165 # [3] Override action, since we distinguish callbacks | |
166 # FIXME: Upstream | |
167 def p_CallbackOrInterface(self, p): | |
168 """CallbackOrInterface : CALLBACK CallbackRestOrInterface | |
169 | Interface""" | |
170 if len(p) > 2: | |
171 p[2].AddChildren(self.BuildTrue('CALLBACK')) | |
172 p[0] = p[2] | |
173 else: | |
174 p[0] = p[1] | |
175 | |
176 # [b27] Add strings, more 'Literal' productions | |
177 # 'Literal's needed because integers and strings are both internally strings | |
178 def p_ConstValue(self, p): | |
179 """ConstValue : BooleanLiteral | |
180 | FloatLiteral | |
181 | IntegerLiteral | |
182 | StringLiteral | |
183 | null""" | |
184 # Standard is (no 'string', fewer 'Literal's): | |
185 # ConstValue : BooleanLiteral | |
186 # | FloatLiteral | |
187 # | integer | |
188 # | NULL | |
189 p[0] = p[1] | |
190 | |
191 # [b27.1] | |
192 def p_IntegerLiteral(self, p): | |
193 """IntegerLiteral : integer""" | |
194 p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'), | |
195 self.BuildAttribute('NAME', p[1])) | |
196 | |
197 # [b27.2] | |
198 def p_StringLiteral(self, p): | |
199 """StringLiteral : string""" | |
200 p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'), | |
201 self.BuildAttribute('NAME', p[1])) | |
202 | |
203 # [b30] Add StaticAttribute | |
204 def p_AttributeOrOperation(self, p): | |
205 """AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation | |
206 | Attribute | |
207 | StaticAttribute | |
208 | Operation""" | |
209 # Standard is (no StaticAttribute): | |
210 # AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation | |
211 # | Attribute | |
212 # | Operation | |
213 if len(p) > 2: | |
214 p[0] = p[2] | |
haraken
2013/07/16 14:17:51
Don't you need to add p[2].AddChildren(self.BuildT
Nils Barth (inactive)
2013/07/17 12:05:09
That’s handled in the base parser:
https://code.go
haraken
2013/07/21 14:31:50
Ah, got it. But it looks inconsistent. Given that
Nils Barth (inactive)
2013/07/22 06:32:01
Good point, that would be clearer: just AddChildre
| |
215 else: | |
216 p[0] = p[1] | |
217 | |
218 # [b30.1] | |
219 def p_StaticAttribute(self, p): | |
220 """StaticAttribute : STATIC Attribute""" | |
221 p[2].AddChildren(self.BuildTrue('STATIC')) | |
222 p[0] = p[2] | |
223 | |
224 # [b47] | |
225 def p_ExceptionMember(self, p): | |
226 """ExceptionMember : Const | |
227 | ExceptionField | |
228 | Attribute | |
229 | ExceptionFieldToString""" | |
230 # Standard is (no Attribute, no ExceptionFieldToString): | |
231 # ExceptionMember : Const | |
232 # | ExceptionField | |
233 p[0] = p[1] | |
234 | |
235 # [b47.1] | |
236 def p_ExceptionFieldToString(self, p): | |
237 """ExceptionFieldToString : Type identifier '(' ')' ';'""" | |
238 # Needed to handle: | |
239 # // Override in a Mozilla compatible format | |
240 # [NotEnumerable] DOMString toString(); | |
haraken
2013/07/16 14:17:51
Why do we need to handle toString() specially.
Nils Barth (inactive)
2013/07/17 12:05:09
Operations in Exceptions are not in the Web IDL sp
haraken
2013/07/21 14:31:50
Looks good, thanks!
Nils Barth (inactive)
2013/07/22 06:32:01
Looking into this more, this “toString()” function
| |
241 p[0] = self.BuildNamed('ExceptionFieldToString', p, 2, p[1]) | |
242 | |
243 # Extended attributes | |
244 # [b49] Override base parser: remove comment field, since comments stripped | |
haraken
2013/07/16 14:17:51
I'm just curious: Would you elaborate on why we ne
Nils Barth (inactive)
2013/07/17 12:05:09
The Pepper IDL parser assumes that each interface
| |
245 # FIXME: Upstream | |
246 def p_ExtendedAttributeList(self, p): | |
247 """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']' | |
248 | '[' ']' | |
249 | """ | |
250 if len(p) > 3: | |
251 items = ListFromConcat(p[2], p[3]) | |
252 attribs = self.BuildProduction('ExtAttributes', p, 1, items) | |
253 p[0] = ListFromConcat(p[0], attribs) | |
haraken
2013/07/16 14:17:51
I'm just curious: Why isn't this "p[0] = attribs"
Nils Barth (inactive)
2013/07/17 12:05:09
Good catch! Fixed (and actually don’t need auxilia
| |
254 | |
255 # [b50] Allow optional trailing comma | |
haraken
2013/07/16 14:17:51
Haven't you already removed all trailing commas fr
Nils Barth (inactive)
2013/07/17 12:05:09
(See separate reply.)
Nils Barth (inactive)
2013/07/17 12:07:11
I removed them, but people kept putting them back.
haraken
2013/07/21 14:31:50
Thanks, fixing the spec side sounds reasonable to
Nils Barth (inactive)
2013/07/22 06:32:01
I've added a link to the bug too:
https://www.w3.o
| |
256 def p_ExtendedAttributes(self, p): | |
257 """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes | |
258 | ',' | |
259 |""" | |
260 if len(p) > 2: | |
261 p[0] = ListFromConcat(p[2], p[3]) | |
262 | |
263 # [b51] Add ExtendedAttributeIdentAndOrIdent | |
264 def p_ExtendedAttribute(self, p): | |
265 """ExtendedAttribute : ExtendedAttributeNoArgs | |
266 | ExtendedAttributeArgList | |
267 | ExtendedAttributeIdent | |
268 | ExtendedAttributeIdentAndOrIdent | |
269 | ExtendedAttributeNamedArgList""" | |
270 p[0] = p[1] | |
271 | |
272 # [59] | |
273 # FIXME: Upstream UnionType | |
274 def p_UnionType(self, p): | |
275 """UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ' )'""" | |
276 members = ListFromConcat(p[2], p[4], p[5]) | |
277 p[0] = self.BuildProduction('UnionType', p, 1, members) | |
278 | |
279 # [60] | |
280 def p_UnionMemberType(self, p): | |
281 """UnionMemberType : NonAnyType | |
282 | UnionType TypeSuffix | |
283 | ANY '[' ']' TypeSuffix""" | |
284 if len(p) == 2: | |
285 p[0] = p[1] | |
286 elif len(p) == 3: | |
287 p[0] = ListFromConcat(p[1], p[2]) | |
288 else: | |
289 p[0] = ListFromConcat(self.BuildProduction('Any', p, 1), p[4]) | |
290 | |
291 # [61] | |
292 def p_UnionMemberTypes(self, p): | |
293 """UnionMemberTypes : OR UnionMemberType UnionMemberTypes | |
294 |""" | |
295 if len(p) > 1: | |
296 p[0] = ListFromConcat(p[2], p[3]) | |
297 | |
298 # [70] Override base parser to remove non-standard sized array | |
299 # FIXME: Upstream | |
300 def p_TypeSuffix(self, p): | |
301 """TypeSuffix : '[' ']' TypeSuffix | |
302 | '?' TypeSuffixStartingWithArray | |
303 |""" | |
304 if len(p) == 4: | |
305 p[0] = self.BuildProduction('Array', p, 1, p[3]) | |
306 | |
307 if len(p) == 3: | |
haraken
2013/07/16 14:17:51
Nit: elif
Nils Barth (inactive)
2013/07/17 12:05:09
(>.<) Done.
Needs fixing in base parser too.
| |
308 p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2]) | |
309 | |
310 # [b76.1] | |
311 def p_ExtendedAttributeIdentAndOrIdent(self, p): | |
312 """ExtendedAttributeIdentAndOrIdent : identifier '=' identifier '&' iden tifier | |
313 | identifier '=' identifier '|' iden tifier""" | |
314 value = self.BuildAttribute('VALUE', p[3] + p[4] + p[5]) | |
315 p[0] = self.BuildNamed('ExtAttribute', p, 1, value) | |
haraken
2013/07/16 14:17:51
I'm just curious: Where is the identifier of p[1]
Nils Barth (inactive)
2013/07/17 12:05:09
p[1] is recorded in the name: that’s what BuildNam
| |
316 | |
317 def __dir__(self): | |
318 # Remove REMOVED_RULES from listing so yacc doesn't parse them | |
319 # FIXME: Upstream | |
320 keys = set(self.__dict__.keys() + dir(self.__class__)) | |
321 for rule in REMOVED_RULES: | |
322 keys.remove('p_' + rule) | |
323 return list(keys) | |
324 | |
325 def __init__(self, lexer=None, verbose=False, debug=False, mute_error=False) : | |
326 lexer = lexer or BlinkIDLLexer() | |
327 self.lexer = lexer | |
328 self.tokens = lexer.KnownTokens() | |
329 # Using SLR (instead of LALR) generates the table faster, | |
330 # but produces the same output. This is ok b/c Web IDL (and Blink IDL) | |
331 # is an LL(1) grammar, so SLR can parse it. | |
332 self.yaccobj = yacc.yacc(module=self, debug=debug, method='SLR') | |
333 self.parse_debug = debug | |
334 self.verbose = verbose | |
335 self.mute_error = mute_error | |
336 self._parse_errors = 0 | |
337 self._parse_warnings = 0 | |
338 self._last_error_msg = None | |
339 self._last_error_lineno = 0 | |
340 self._last_error_pos = 0 | |
341 | |
342 | |
343 # If run by itself, attempt to build the parser | |
344 if __name__ == '__main__': | |
345 parser = BlinkIDLParser() | |
OLD | NEW |