Source/bindings/scripts/blink_idl_parser.py - Issue 15801003: IDL parser rewrite in Python

Unified Diff: Source/bindings/scripts/blink_idl_parser.py

Issue 15801003: IDL parser rewrite in Python (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Ready for review! (cleaner) Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« Source/bindings/scripts/blink_idl_lexer.py ('K') | « Source/bindings/scripts/blink_idl_lexer.py ('k') | Source/bindings/scripts/idl_compiler.py » ('j') | Source/bindings/scripts/idl_compiler.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: Source/bindings/scripts/blink_idl_parser.py

diff --git a/Source/bindings/scripts/blink_idl_parser.py b/Source/bindings/scripts/blink_idl_parser.py

new file mode 100644

index 0000000000000000000000000000000000000000..6fb95511e22bc695d8d64d072ea3b19e7c0b56fa

--- /dev/null

+++ b/Source/bindings/scripts/blink_idl_parser.py

@@ -0,0 +1,345 @@

+# Redistribution and use in source and binary forms, with or without

+# modification, are permitted provided that the following conditions are

+# met:

+# * Redistributions of source code must retain the above copyright

+# notice, this list of conditions and the following disclaimer.

+# * Redistributions in binary form must reproduce the above

+# copyright notice, this list of conditions and the following disclaimer

+# in the documentation and/or other materials provided with the

+# distribution.

+# * Neither the name of Google Inc. nor the names of its

+# contributors may be used to endorse or promote products derived from

+# this software without specific prior written permission.

+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+"""Parser for Blink IDL.

+The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing

+rules which understand the Blink dialect of Web IDL.

+It derives from a standard Web IDL parser, overriding rules where Blink IDL

+differs syntactically or semantically from the base parser, or where the base

+parser diverges from the Web IDL standard.

+Web IDL:

+ http://www.w3.org/TR/WebIDL/

+Web IDL Grammar:

+ http://www.w3.org/TR/WebIDL/#idl-grammar

+PLY:

+ http://www.dabeaz.com/ply/

+"""

+# Disable check for line length and Member as Function due to how grammar rules

+# are defined with PLY

+# pylint: disable=R0201

+# pylint: disable=C0301

+# Disable attribute validation, as lint can't import parent class to check

+# pylint: disable=E1101

haraken 2013/07/16 14:17:51 Is this comment helpful?

Nils Barth (inactive) 2013/07/17 12:05:09 (As above.) Yes, quiets pylint error.

+import os.path

+import sys

+# PLY is in Chromium src/third_party/ply

+module_path, module_name = os.path.split(__file__)

+third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir)

+sys.path.append(third_party)

haraken 2013/07/16 14:17:51 Instead of writing the relative path here, let's p

Nils Barth (inactive) 2013/07/17 12:05:09 (See separate response.)

+from ply import yacc

+# Base parser is in Chromium src/tools/idl_parser

+tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')

+sys.path.append(tools_dir)

haraken 2013/07/16 14:17:51 Ditto.

Nils Barth (inactive) 2013/07/17 12:05:09 (Ditto.)

+# Don't change case of ListFromConcat, for consistency with base parser

haraken 2013/07/16 14:17:51 Nit: I'd remove this comment.

Nils Barth (inactive) 2013/07/17 12:05:09 Done.

+from idl_parser.idl_parser import IDLParser, ListFromConcat

+# Change function name, due to different Chromium/Blink convention

haraken 2013/07/16 14:17:51 Ditto.

Nils Barth (inactive) 2013/07/17 12:05:09 Done.

+from idl_parser.idl_parser import ParseFile as parse_file

haraken 2013/07/16 14:17:51 Looks like parse_file is unused.

Nils Barth (inactive) 2013/07/17 12:05:09 parse_file is used in idl_reader (it’s a simple ut

+from blink_idl_lexer import BlinkIDLLexer

+# We ignore comments, but base parser preserves them

+# FIXME: Upstream: comments should be removed in base parser

+REMOVED_RULES = ['Comments', # [0.1]

+ 'CommentsRest', # [0.2]

+ ]

+class BlinkIDLParser(IDLParser):

+ # Below are grammar rules used by yacc, given by functions named p_<RULE>.

+ # * The docstring is the production rule in BNF (grammar).

+ # * The body is the yacc action (semantics).

+ # Reference:

+ # http://www.dabeaz.com/ply/ply.html#ply_nn23

+ #

+ # Review of yacc:

haraken 2013/07/16 14:17:51 Great summary! This is the easiest yacc tutorial I

Nils Barth (inactive) 2013/07/17 12:05:09 Thanks! (#^_^#)

+ # Yacc parses a token stream, internally producing a Concrete Syntax Tree

+ # (CST), where each node corresponds to a production rule in the grammar.

+ # At each node, it runs an action, which is usually "produce a node in the

+ # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST

+ # that aren't included in the AST, since only needed for parsing).

+ #

+ # The rules use pseudo-variables; in PLY syntax:

+ # p[0] is the left side: assign return value to p[0] instead of returning,

+ # p[1] ... p[n] are the right side: the values can be accessed, and they

+ # can be modified.

+ # (In yacc these are $$ and $1 ... $n.)

+ #

+ # The rules can look cryptic at first, but there are a few standard

+ # transforms from the CST to AST. With these in mind, the actions should

+ # be reasonably legible.

+ #

+ # * Ignore production

+ # Discard this branch. Primarily used when one alternative is empty.

+ #

+ # Sample code:

+ # if len(p) > 1:

+ # p[0] = ...

+ # # Note no assignment if len(p) == 1

+ #

+ # * Eliminate singleton production

+ # Discard this node in the CST, pass the next level down up the tree.

+ # Used to ignore productions only necessary for parsing, but not needed

+ # in the AST.

+ #

+ # Sample code:

+ # p[0] = p[1]

+ #

+ # * Build node

+ # The key type of rule. In this parser, produces object of class IDLNode.

+ # There are several helper functions; see base idl_parser.py for

+ # definitions and more examples of use.

+ #

+ # Sample code:

+ # # Build node of type NodeType, with value p[1], and children.

+ # p[0] = self.BuildProduction('NodeType', p, 1, children)

+ #

+ # # Build named node of type NodeType, with name and value p[1].

haraken 2013/07/16 14:17:51 I'm just curious: What's the difference between a

Nils Barth (inactive) 2013/07/17 12:05:09 A named node also has the attribute ‘NAME’ set; th

+ # # (children optional)

+ # p[0] = self.BuildNamed('NodeType', p, 1)

+ #

+ # # Make a list

+ # # Used if one node has several children.

+ # children = ListFromConcat(p[2], p[3])

+ # p[0] = self.BuildProduction('NodeType', p, 1, children)

+ #

+ # # Also used to collapse the right-associative tree

+ # # produced by parsing a list back into a single list.

+ # """Foos : Foo Foos

+ # |"""

+ # if len(p) > 1:

+ # p[0] = ListFromConcat(p[1], p[2])

+ #

+ # # Add children.

+ # # Primarily used to add attributes, produced via BuildTrue.

+ # # p_StaticAttribute

+ # """StaticAttribute : STATIC Attribute"""

+ # p[2].AddChildren(self.BuildTrue('STATIC'))

+ # p[0] = p[2]

+ #

haraken 2013/07/16 14:17:51 You might want to add an explanation for self.Buil

Nils Barth (inactive) 2013/07/17 12:05:09 Got it, done. (Also BuildTrue.)

+ # Numbering scheme for the rules is:

+ # [1] for Web IDL spec (or additions in base parser)

+ # These should all be upstreamed to the base parser.

+ # [b1] for Blink IDL changes (overrides Web IDL)

+ # [b1.1] for Blink IDL additions, auxiliary rules for [b1]

haraken 2013/07/16 14:17:51 The [X] numbering is fragile. As far as I see the

Nils Barth (inactive) 2013/07/17 12:05:09 This is for consistency with the base Pepper parse

haraken 2013/07/21 14:31:50 Makes sense.

+ # [0] Override grammar, since we strip comments

+ # (not in Web IDL)

+ # FIXME: Upstream

+ def p_Top(self, p):

+ """Top : Definitions"""

+ p[0] = p[1]

+ # [3] Override action, since we distinguish callbacks

+ # FIXME: Upstream

+ def p_CallbackOrInterface(self, p):

+ """CallbackOrInterface : CALLBACK CallbackRestOrInterface

+ | Interface"""

+ if len(p) > 2:

+ p[2].AddChildren(self.BuildTrue('CALLBACK'))

+ p[0] = p[2]

+ else:

+ p[0] = p[1]

+ # [b27] Add strings, more 'Literal' productions

+ # 'Literal's needed because integers and strings are both internally strings

+ def p_ConstValue(self, p):

+ """ConstValue : BooleanLiteral

+ | FloatLiteral

+ | IntegerLiteral

+ | StringLiteral

+ | null"""

+ # Standard is (no 'string', fewer 'Literal's):

+ # ConstValue : BooleanLiteral

+ # | FloatLiteral

+ # | integer

+ # | NULL

+ p[0] = p[1]

+ # [b27.1]

+ def p_IntegerLiteral(self, p):

+ """IntegerLiteral : integer"""

+ p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'),

+ self.BuildAttribute('NAME', p[1]))

+ # [b27.2]

+ def p_StringLiteral(self, p):

+ """StringLiteral : string"""

+ p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'),

+ self.BuildAttribute('NAME', p[1]))

+ # [b30] Add StaticAttribute

+ def p_AttributeOrOperation(self, p):

+ """AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation

+ | Attribute

+ | StaticAttribute

+ | Operation"""

+ # Standard is (no StaticAttribute):

+ # AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation

+ # | Attribute

+ # | Operation

+ if len(p) > 2:

+ p[0] = p[2]

haraken 2013/07/16 14:17:51 Don't you need to add p[2].AddChildren(self.BuildT

Nils Barth (inactive) 2013/07/17 12:05:09 That’s handled in the base parser: https://code.go

haraken 2013/07/21 14:31:50 Ah, got it. But it looks inconsistent. Given that

Nils Barth (inactive) 2013/07/22 06:32:01 Good point, that would be clearer: just AddChildre

+ else:

+ p[0] = p[1]

+ # [b30.1]

+ def p_StaticAttribute(self, p):

+ """StaticAttribute : STATIC Attribute"""

+ p[2].AddChildren(self.BuildTrue('STATIC'))

+ p[0] = p[2]

+ # [b47]

+ def p_ExceptionMember(self, p):

+ """ExceptionMember : Const

+ | ExceptionField

+ | Attribute

+ | ExceptionFieldToString"""

+ # Standard is (no Attribute, no ExceptionFieldToString):

+ # ExceptionMember : Const

+ # | ExceptionField

+ p[0] = p[1]

+ # [b47.1]

+ def p_ExceptionFieldToString(self, p):

+ """ExceptionFieldToString : Type identifier '(' ')' ';'"""

+ # Needed to handle:

+ # // Override in a Mozilla compatible format

+ # [NotEnumerable] DOMString toString();

haraken 2013/07/16 14:17:51 Why do we need to handle toString() specially.

Nils Barth (inactive) 2013/07/17 12:05:09 Operations in Exceptions are not in the Web IDL sp

haraken 2013/07/21 14:31:50 Looks good, thanks!

Nils Barth (inactive) 2013/07/22 06:32:01 Looking into this more, this “toString()” function

+ p[0] = self.BuildNamed('ExceptionFieldToString', p, 2, p[1])

+ # Extended attributes

+ # [b49] Override base parser: remove comment field, since comments stripped

haraken 2013/07/16 14:17:51 I'm just curious: Would you elaborate on why we ne

Nils Barth (inactive) 2013/07/17 12:05:09 The Pepper IDL parser assumes that each interface

+ # FIXME: Upstream

+ def p_ExtendedAttributeList(self, p):

+ """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'

+ | '[' ']'

+ | """

+ if len(p) > 3:

+ items = ListFromConcat(p[2], p[3])

+ attribs = self.BuildProduction('ExtAttributes', p, 1, items)

+ p[0] = ListFromConcat(p[0], attribs)

haraken 2013/07/16 14:17:51 I'm just curious: Why isn't this "p[0] = attribs"

Nils Barth (inactive) 2013/07/17 12:05:09 Good catch! Fixed (and actually don’t need auxilia

+ # [b50] Allow optional trailing comma

haraken 2013/07/16 14:17:51 Haven't you already removed all trailing commas fr

Nils Barth (inactive) 2013/07/17 12:05:09 (See separate reply.)

Nils Barth (inactive) 2013/07/17 12:07:11 I removed them, but people kept putting them back.

haraken 2013/07/21 14:31:50 Thanks, fixing the spec side sounds reasonable to

Nils Barth (inactive) 2013/07/22 06:32:01 I've added a link to the bug too: https://www.w3.o

+ def p_ExtendedAttributes(self, p):

+ """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes

+ | ','

+ |"""

+ if len(p) > 2:

+ p[0] = ListFromConcat(p[2], p[3])

+ # [b51] Add ExtendedAttributeIdentAndOrIdent

+ def p_ExtendedAttribute(self, p):

+ """ExtendedAttribute : ExtendedAttributeNoArgs

+ | ExtendedAttributeArgList

+ | ExtendedAttributeIdent

+ | ExtendedAttributeIdentAndOrIdent

+ | ExtendedAttributeNamedArgList"""

+ p[0] = p[1]

+ # [59]

+ # FIXME: Upstream UnionType

+ def p_UnionType(self, p):

+ """UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ')'"""

+ members = ListFromConcat(p[2], p[4], p[5])

+ p[0] = self.BuildProduction('UnionType', p, 1, members)

+ # [60]

+ def p_UnionMemberType(self, p):

+ """UnionMemberType : NonAnyType

+ | UnionType TypeSuffix

+ | ANY '[' ']' TypeSuffix"""

+ if len(p) == 2:

+ p[0] = p[1]

+ elif len(p) == 3:

+ p[0] = ListFromConcat(p[1], p[2])

+ else:

+ p[0] = ListFromConcat(self.BuildProduction('Any', p, 1), p[4])

+ # [61]

+ def p_UnionMemberTypes(self, p):

+ """UnionMemberTypes : OR UnionMemberType UnionMemberTypes

+ |"""

+ if len(p) > 1:

+ p[0] = ListFromConcat(p[2], p[3])

+ # [70] Override base parser to remove non-standard sized array

+ # FIXME: Upstream

+ def p_TypeSuffix(self, p):

+ """TypeSuffix : '[' ']' TypeSuffix

+ | '?' TypeSuffixStartingWithArray

+ |"""

+ if len(p) == 4:

+ p[0] = self.BuildProduction('Array', p, 1, p[3])

+ if len(p) == 3:

haraken 2013/07/16 14:17:51 Nit: elif

Nils Barth (inactive) 2013/07/17 12:05:09 (>.<) Done. Needs fixing in base parser too.

+ p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2])

+ # [b76.1]

+ def p_ExtendedAttributeIdentAndOrIdent(self, p):

+ """ExtendedAttributeIdentAndOrIdent : identifier '=' identifier '&' identifier

+ | identifier '=' identifier '|' identifier"""

+ value = self.BuildAttribute('VALUE', p[3] + p[4] + p[5])

+ p[0] = self.BuildNamed('ExtAttribute', p, 1, value)

haraken 2013/07/16 14:17:51 I'm just curious: Where is the identifier of p[1]

Nils Barth (inactive) 2013/07/17 12:05:09 p[1] is recorded in the name: that’s what BuildNam

+ def __dir__(self):

+ # Remove REMOVED_RULES from listing so yacc doesn't parse them

+ # FIXME: Upstream

+ keys = set(self.__dict__.keys() + dir(self.__class__))

+ for rule in REMOVED_RULES:

+ keys.remove('p_' + rule)

+ return list(keys)

+ def __init__(self, lexer=None, verbose=False, debug=False, mute_error=False):

+ lexer = lexer or BlinkIDLLexer()

+ self.lexer = lexer

+ self.tokens = lexer.KnownTokens()

+ # Using SLR (instead of LALR) generates the table faster,

+ # but produces the same output. This is ok b/c Web IDL (and Blink IDL)

+ # is an LL(1) grammar, so SLR can parse it.

+ self.yaccobj = yacc.yacc(module=self, debug=debug, method='SLR')

+ self.parse_debug = debug

+ self.verbose = verbose

+ self.mute_error = mute_error

+ self._parse_errors = 0

+ self._parse_warnings = 0

+ self._last_error_msg = None

+ self._last_error_lineno = 0

+ self._last_error_pos = 0

+# If run by itself, attempt to build the parser

+if __name__ == '__main__':

+ parser = BlinkIDLParser()