Chromium Code Reviews| Index: Source/bindings/scripts/blink_idl_parser.py |
| diff --git a/Source/bindings/scripts/blink_idl_parser.py b/Source/bindings/scripts/blink_idl_parser.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..6fb95511e22bc695d8d64d072ea3b19e7c0b56fa |
| --- /dev/null |
| +++ b/Source/bindings/scripts/blink_idl_parser.py |
| @@ -0,0 +1,345 @@ |
| +# Copyright (C) 2013 Google Inc. All rights reserved. |
| +# |
| +# Redistribution and use in source and binary forms, with or without |
| +# modification, are permitted provided that the following conditions are |
| +# met: |
| +# |
| +# * Redistributions of source code must retain the above copyright |
| +# notice, this list of conditions and the following disclaimer. |
| +# * Redistributions in binary form must reproduce the above |
| +# copyright notice, this list of conditions and the following disclaimer |
| +# in the documentation and/or other materials provided with the |
| +# distribution. |
| +# * Neither the name of Google Inc. nor the names of its |
| +# contributors may be used to endorse or promote products derived from |
| +# this software without specific prior written permission. |
| +# |
| +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| + |
| +"""Parser for Blink IDL. |
| + |
| +The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing |
| +rules which understand the Blink dialect of Web IDL. |
| +It derives from a standard Web IDL parser, overriding rules where Blink IDL |
| +differs syntactically or semantically from the base parser, or where the base |
| +parser diverges from the Web IDL standard. |
| + |
| +Web IDL: |
| + http://www.w3.org/TR/WebIDL/ |
| +Web IDL Grammar: |
| + http://www.w3.org/TR/WebIDL/#idl-grammar |
| +PLY: |
| + http://www.dabeaz.com/ply/ |
| +""" |
| + |
| +# Disable check for line length and Member as Function due to how grammar rules |
| +# are defined with PLY |
| +# |
| +# pylint: disable=R0201 |
| +# pylint: disable=C0301 |
| +# |
| +# Disable attribute validation, as lint can't import parent class to check |
| +# pylint: disable=E1101 |
|
haraken
2013/07/16 14:17:51
Is this comment helpful?
Nils Barth (inactive)
2013/07/17 12:05:09
(As above.) Yes, quiets pylint error.
|
| + |
| +import os.path |
| +import sys |
| + |
| +# PLY is in Chromium src/third_party/ply |
| +module_path, module_name = os.path.split(__file__) |
| +third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir) |
| +sys.path.append(third_party) |
|
haraken
2013/07/16 14:17:51
Instead of writing the relative path here, let's p
Nils Barth (inactive)
2013/07/17 12:05:09
(See separate response.)
|
| +from ply import yacc |
| + |
| +# Base parser is in Chromium src/tools/idl_parser |
| +tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools') |
| +sys.path.append(tools_dir) |
|
haraken
2013/07/16 14:17:51
Ditto.
Nils Barth (inactive)
2013/07/17 12:05:09
(Ditto.)
|
| +# Don't change case of ListFromConcat, for consistency with base parser |
|
haraken
2013/07/16 14:17:51
Nit: I'd remove this comment.
Nils Barth (inactive)
2013/07/17 12:05:09
Done.
|
| +from idl_parser.idl_parser import IDLParser, ListFromConcat |
| +# Change function name, due to different Chromium/Blink convention |
|
haraken
2013/07/16 14:17:51
Ditto.
Nils Barth (inactive)
2013/07/17 12:05:09
Done.
|
| +from idl_parser.idl_parser import ParseFile as parse_file |
|
haraken
2013/07/16 14:17:51
Looks like parse_file is unused.
Nils Barth (inactive)
2013/07/17 12:05:09
parse_file is used in idl_reader (it’s a simple ut
|
| + |
| +from blink_idl_lexer import BlinkIDLLexer |
| + |
| +# We ignore comments, but base parser preserves them |
| +# FIXME: Upstream: comments should be removed in base parser |
| +REMOVED_RULES = ['Comments', # [0.1] |
| + 'CommentsRest', # [0.2] |
| + ] |
| + |
| + |
| +class BlinkIDLParser(IDLParser): |
| + # Below are grammar rules used by yacc, given by functions named p_<RULE>. |
| + # * The docstring is the production rule in BNF (grammar). |
| + # * The body is the yacc action (semantics). |
| + # Reference: |
| + # http://www.dabeaz.com/ply/ply.html#ply_nn23 |
| + # |
| + # Review of yacc: |
|
haraken
2013/07/16 14:17:51
Great summary! This is the easiest yacc tutorial I
Nils Barth (inactive)
2013/07/17 12:05:09
Thanks! (#^_^#)
|
| + # Yacc parses a token stream, internally producing a Concrete Syntax Tree |
| + # (CST), where each node corresponds to a production rule in the grammar. |
| + # At each node, it runs an action, which is usually "produce a node in the |
| + # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST |
| + # that aren't included in the AST, since only needed for parsing). |
| + # |
| + # The rules use pseudo-variables; in PLY syntax: |
| + # p[0] is the left side: assign return value to p[0] instead of returning, |
| + # p[1] ... p[n] are the right side: the values can be accessed, and they |
| + # can be modified. |
| + # (In yacc these are $$ and $1 ... $n.) |
| + # |
| + # The rules can look cryptic at first, but there are a few standard |
| + # transforms from the CST to AST. With these in mind, the actions should |
| + # be reasonably legible. |
| + # |
| + # * Ignore production |
| + # Discard this branch. Primarily used when one alternative is empty. |
| + # |
| + # Sample code: |
| + # if len(p) > 1: |
| + # p[0] = ... |
| + # # Note no assignment if len(p) == 1 |
| + # |
| + # * Eliminate singleton production |
| + # Discard this node in the CST, pass the next level down up the tree. |
| + # Used to ignore productions only necessary for parsing, but not needed |
| + # in the AST. |
| + # |
| + # Sample code: |
| + # p[0] = p[1] |
| + # |
| + # * Build node |
| + # The key type of rule. In this parser, produces object of class IDLNode. |
| + # There are several helper functions; see base idl_parser.py for |
| + # definitions and more examples of use. |
| + # |
| + # Sample code: |
| + # # Build node of type NodeType, with value p[1], and children. |
| + # p[0] = self.BuildProduction('NodeType', p, 1, children) |
| + # |
| + # # Build named node of type NodeType, with name and value p[1]. |
|
haraken
2013/07/16 14:17:51
I'm just curious: What's the difference between a
Nils Barth (inactive)
2013/07/17 12:05:09
A named node also has the attribute ‘NAME’ set; th
|
| + # # (children optional) |
| + # p[0] = self.BuildNamed('NodeType', p, 1) |
| + # |
| + # # Make a list |
| + # # Used if one node has several children. |
| + # children = ListFromConcat(p[2], p[3]) |
| + # p[0] = self.BuildProduction('NodeType', p, 1, children) |
| + # |
| + # # Also used to collapse the right-associative tree |
| + # # produced by parsing a list back into a single list. |
| + # """Foos : Foo Foos |
| + # |""" |
| + # if len(p) > 1: |
| + # p[0] = ListFromConcat(p[1], p[2]) |
| + # |
| + # # Add children. |
| + # # Primarily used to add attributes, produced via BuildTrue. |
| + # # p_StaticAttribute |
| + # """StaticAttribute : STATIC Attribute""" |
| + # p[2].AddChildren(self.BuildTrue('STATIC')) |
| + # p[0] = p[2] |
| + # |
|
haraken
2013/07/16 14:17:51
You might want to add an explanation for self.Buil
Nils Barth (inactive)
2013/07/17 12:05:09
Got it, done. (Also BuildTrue.)
|
| + # Numbering scheme for the rules is: |
| + # [1] for Web IDL spec (or additions in base parser) |
| + # These should all be upstreamed to the base parser. |
| + # [b1] for Blink IDL changes (overrides Web IDL) |
| + # [b1.1] for Blink IDL additions, auxiliary rules for [b1] |
|
haraken
2013/07/16 14:17:51
The [X] numbering is fragile. As far as I see the
Nils Barth (inactive)
2013/07/17 12:05:09
This is for consistency with the base Pepper parse
haraken
2013/07/21 14:31:50
Makes sense.
|
| + |
| + # [0] Override grammar, since we strip comments |
| + # (not in Web IDL) |
| + # FIXME: Upstream |
| + def p_Top(self, p): |
| + """Top : Definitions""" |
| + p[0] = p[1] |
| + |
| + # [3] Override action, since we distinguish callbacks |
| + # FIXME: Upstream |
| + def p_CallbackOrInterface(self, p): |
| + """CallbackOrInterface : CALLBACK CallbackRestOrInterface |
| + | Interface""" |
| + if len(p) > 2: |
| + p[2].AddChildren(self.BuildTrue('CALLBACK')) |
| + p[0] = p[2] |
| + else: |
| + p[0] = p[1] |
| + |
| + # [b27] Add strings, more 'Literal' productions |
| + # 'Literal's needed because integers and strings are both internally strings |
| + def p_ConstValue(self, p): |
| + """ConstValue : BooleanLiteral |
| + | FloatLiteral |
| + | IntegerLiteral |
| + | StringLiteral |
| + | null""" |
| + # Standard is (no 'string', fewer 'Literal's): |
| + # ConstValue : BooleanLiteral |
| + # | FloatLiteral |
| + # | integer |
| + # | NULL |
| + p[0] = p[1] |
| + |
| + # [b27.1] |
| + def p_IntegerLiteral(self, p): |
| + """IntegerLiteral : integer""" |
| + p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'), |
| + self.BuildAttribute('NAME', p[1])) |
| + |
| + # [b27.2] |
| + def p_StringLiteral(self, p): |
| + """StringLiteral : string""" |
| + p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'), |
| + self.BuildAttribute('NAME', p[1])) |
| + |
| + # [b30] Add StaticAttribute |
| + def p_AttributeOrOperation(self, p): |
| + """AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation |
| + | Attribute |
| + | StaticAttribute |
| + | Operation""" |
| + # Standard is (no StaticAttribute): |
| + # AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation |
| + # | Attribute |
| + # | Operation |
| + if len(p) > 2: |
| + p[0] = p[2] |
|
haraken
2013/07/16 14:17:51
Don't you need to add p[2].AddChildren(self.BuildT
Nils Barth (inactive)
2013/07/17 12:05:09
That’s handled in the base parser:
https://code.go
haraken
2013/07/21 14:31:50
Ah, got it. But it looks inconsistent. Given that
Nils Barth (inactive)
2013/07/22 06:32:01
Good point, that would be clearer: just AddChildre
|
| + else: |
| + p[0] = p[1] |
| + |
| + # [b30.1] |
| + def p_StaticAttribute(self, p): |
| + """StaticAttribute : STATIC Attribute""" |
| + p[2].AddChildren(self.BuildTrue('STATIC')) |
| + p[0] = p[2] |
| + |
| + # [b47] |
| + def p_ExceptionMember(self, p): |
| + """ExceptionMember : Const |
| + | ExceptionField |
| + | Attribute |
| + | ExceptionFieldToString""" |
| + # Standard is (no Attribute, no ExceptionFieldToString): |
| + # ExceptionMember : Const |
| + # | ExceptionField |
| + p[0] = p[1] |
| + |
| + # [b47.1] |
| + def p_ExceptionFieldToString(self, p): |
| + """ExceptionFieldToString : Type identifier '(' ')' ';'""" |
| + # Needed to handle: |
| + # // Override in a Mozilla compatible format |
| + # [NotEnumerable] DOMString toString(); |
|
haraken
2013/07/16 14:17:51
Why do we need to handle toString() specially.
Nils Barth (inactive)
2013/07/17 12:05:09
Operations in Exceptions are not in the Web IDL sp
haraken
2013/07/21 14:31:50
Looks good, thanks!
Nils Barth (inactive)
2013/07/22 06:32:01
Looking into this more, this “toString()” function
|
| + p[0] = self.BuildNamed('ExceptionFieldToString', p, 2, p[1]) |
| + |
| + # Extended attributes |
| + # [b49] Override base parser: remove comment field, since comments stripped |
|
haraken
2013/07/16 14:17:51
I'm just curious: Would you elaborate on why we ne
Nils Barth (inactive)
2013/07/17 12:05:09
The Pepper IDL parser assumes that each interface
|
| + # FIXME: Upstream |
| + def p_ExtendedAttributeList(self, p): |
| + """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']' |
| + | '[' ']' |
| + | """ |
| + if len(p) > 3: |
| + items = ListFromConcat(p[2], p[3]) |
| + attribs = self.BuildProduction('ExtAttributes', p, 1, items) |
| + p[0] = ListFromConcat(p[0], attribs) |
|
haraken
2013/07/16 14:17:51
I'm just curious: Why isn't this "p[0] = attribs"
Nils Barth (inactive)
2013/07/17 12:05:09
Good catch! Fixed (and actually don’t need auxilia
|
| + |
| + # [b50] Allow optional trailing comma |
|
haraken
2013/07/16 14:17:51
Haven't you already removed all trailing commas fr
Nils Barth (inactive)
2013/07/17 12:05:09
(See separate reply.)
Nils Barth (inactive)
2013/07/17 12:07:11
I removed them, but people kept putting them back.
haraken
2013/07/21 14:31:50
Thanks, fixing the spec side sounds reasonable to
Nils Barth (inactive)
2013/07/22 06:32:01
I've added a link to the bug too:
https://www.w3.o
|
| + def p_ExtendedAttributes(self, p): |
| + """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes |
| + | ',' |
| + |""" |
| + if len(p) > 2: |
| + p[0] = ListFromConcat(p[2], p[3]) |
| + |
| + # [b51] Add ExtendedAttributeIdentAndOrIdent |
| + def p_ExtendedAttribute(self, p): |
| + """ExtendedAttribute : ExtendedAttributeNoArgs |
| + | ExtendedAttributeArgList |
| + | ExtendedAttributeIdent |
| + | ExtendedAttributeIdentAndOrIdent |
| + | ExtendedAttributeNamedArgList""" |
| + p[0] = p[1] |
| + |
| + # [59] |
| + # FIXME: Upstream UnionType |
| + def p_UnionType(self, p): |
| + """UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ')'""" |
| + members = ListFromConcat(p[2], p[4], p[5]) |
| + p[0] = self.BuildProduction('UnionType', p, 1, members) |
| + |
| + # [60] |
| + def p_UnionMemberType(self, p): |
| + """UnionMemberType : NonAnyType |
| + | UnionType TypeSuffix |
| + | ANY '[' ']' TypeSuffix""" |
| + if len(p) == 2: |
| + p[0] = p[1] |
| + elif len(p) == 3: |
| + p[0] = ListFromConcat(p[1], p[2]) |
| + else: |
| + p[0] = ListFromConcat(self.BuildProduction('Any', p, 1), p[4]) |
| + |
| + # [61] |
| + def p_UnionMemberTypes(self, p): |
| + """UnionMemberTypes : OR UnionMemberType UnionMemberTypes |
| + |""" |
| + if len(p) > 1: |
| + p[0] = ListFromConcat(p[2], p[3]) |
| + |
| + # [70] Override base parser to remove non-standard sized array |
| + # FIXME: Upstream |
| + def p_TypeSuffix(self, p): |
| + """TypeSuffix : '[' ']' TypeSuffix |
| + | '?' TypeSuffixStartingWithArray |
| + |""" |
| + if len(p) == 4: |
| + p[0] = self.BuildProduction('Array', p, 1, p[3]) |
| + |
| + if len(p) == 3: |
|
haraken
2013/07/16 14:17:51
Nit: elif
Nils Barth (inactive)
2013/07/17 12:05:09
(>.<) Done.
Needs fixing in base parser too.
|
| + p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2]) |
| + |
| + # [b76.1] |
| + def p_ExtendedAttributeIdentAndOrIdent(self, p): |
| + """ExtendedAttributeIdentAndOrIdent : identifier '=' identifier '&' identifier |
| + | identifier '=' identifier '|' identifier""" |
| + value = self.BuildAttribute('VALUE', p[3] + p[4] + p[5]) |
| + p[0] = self.BuildNamed('ExtAttribute', p, 1, value) |
|
haraken
2013/07/16 14:17:51
I'm just curious: Where is the identifier of p[1]
Nils Barth (inactive)
2013/07/17 12:05:09
p[1] is recorded in the name: that’s what BuildNam
|
| + |
| + def __dir__(self): |
| + # Remove REMOVED_RULES from listing so yacc doesn't parse them |
| + # FIXME: Upstream |
| + keys = set(self.__dict__.keys() + dir(self.__class__)) |
| + for rule in REMOVED_RULES: |
| + keys.remove('p_' + rule) |
| + return list(keys) |
| + |
| + def __init__(self, lexer=None, verbose=False, debug=False, mute_error=False): |
| + lexer = lexer or BlinkIDLLexer() |
| + self.lexer = lexer |
| + self.tokens = lexer.KnownTokens() |
| + # Using SLR (instead of LALR) generates the table faster, |
| + # but produces the same output. This is ok b/c Web IDL (and Blink IDL) |
| + # is an LL(1) grammar, so SLR can parse it. |
| + self.yaccobj = yacc.yacc(module=self, debug=debug, method='SLR') |
| + self.parse_debug = debug |
| + self.verbose = verbose |
| + self.mute_error = mute_error |
| + self._parse_errors = 0 |
| + self._parse_warnings = 0 |
| + self._last_error_msg = None |
| + self._last_error_lineno = 0 |
| + self._last_error_pos = 0 |
| + |
| + |
| +# If run by itself, attempt to build the parser |
| +if __name__ == '__main__': |
| + parser = BlinkIDLParser() |