Source/bindings/scripts/blink_idl_parser.py - Issue 15801003: IDL parser rewrite in Python

Unified Diff: Source/bindings/scripts/blink_idl_parser.py

Issue 15801003: IDL parser rewrite in Python (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Revised. Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« Source/bindings/scripts/blink_idl_lexer.py ('K') | « Source/bindings/scripts/blink_idl_lexer.py ('k') | Source/bindings/scripts/idl_compiler.py » ('j') | Source/bindings/scripts/idl_definitions.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: Source/bindings/scripts/blink_idl_parser.py

diff --git a/Source/bindings/scripts/blink_idl_parser.py b/Source/bindings/scripts/blink_idl_parser.py

new file mode 100644

index 0000000000000000000000000000000000000000..c554c5158e5fe76e725cf61cb37f98933dbd2454

--- /dev/null

+++ b/Source/bindings/scripts/blink_idl_parser.py

@@ -0,0 +1,354 @@

+# Redistribution and use in source and binary forms, with or without

+# modification, are permitted provided that the following conditions are

+# met:

+# * Redistributions of source code must retain the above copyright

+# notice, this list of conditions and the following disclaimer.

+# * Redistributions in binary form must reproduce the above

+# copyright notice, this list of conditions and the following disclaimer

+# in the documentation and/or other materials provided with the

+# distribution.

+# * Neither the name of Google Inc. nor the names of its

+# contributors may be used to endorse or promote products derived from

+# this software without specific prior written permission.

+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+"""Parser for Blink IDL.

+The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing

+rules which understand the Blink dialect of Web IDL.

+It derives from a standard Web IDL parser, overriding rules where Blink IDL

+differs syntactically or semantically from the base parser, or where the base

+parser diverges from the Web IDL standard.

+Web IDL:

+ http://www.w3.org/TR/WebIDL/

+Web IDL Grammar:

+ http://www.w3.org/TR/WebIDL/#idl-grammar

+PLY:

+ http://www.dabeaz.com/ply/

+"""

+# Disable check for line length and Member as Function due to how grammar rules

+# are defined with PLY

+# pylint: disable=R0201

+# pylint: disable=C0301

+# Disable attribute validation, as lint can't import parent class to check

+# pylint: disable=E1101

+import os.path

+import sys

+# PLY is in Chromium src/third_party/ply

+module_path, module_name = os.path.split(__file__)

+third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir)

+sys.path.append(third_party)

+from ply import yacc

+# Base parser is in Chromium src/tools/idl_parser

+tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')

+sys.path.append(tools_dir)

+from idl_parser.idl_parser import IDLParser, ListFromConcat

+from idl_parser.idl_parser import ParseFile as parse_file

+from blink_idl_lexer import BlinkIDLLexer

+# We ignore comments, but base parser preserves them

+# FIXME: Upstream: comments should be removed in base parser

+REMOVED_RULES = ['Comments', # [0.1]

+ 'CommentsRest', # [0.2]

+ ]

+class BlinkIDLParser(IDLParser):

+ # Below are grammar rules used by yacc, given by functions named p_<RULE>.

+ # * The docstring is the production rule in BNF (grammar).

haraken 2013/07/22 01:50:23 Help me understand: The docstring is actually used

Nils Barth (inactive) 2013/07/22 06:32:01 Exactly (see above, in lexer). I’ve written a more

+ # * The body is the yacc action (semantics).

+ # Reference:

+ # http://www.dabeaz.com/ply/ply.html#ply_nn23

+ #

+ # Review of yacc:

+ # Yacc parses a token stream, internally producing a Concrete Syntax Tree

+ # (CST), where each node corresponds to a production rule in the grammar.

+ # At each node, it runs an action, which is usually "produce a node in the

+ # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST

+ # that aren't included in the AST, since only needed for parsing).

+ #

+ # The rules use pseudo-variables; in PLY syntax:

+ # p[0] is the left side: assign return value to p[0] instead of returning,

+ # p[1] ... p[n] are the right side: the values can be accessed, and they

+ # can be modified.

+ # (In yacc these are $$ and $1 ... $n.)

+ #

+ # The rules can look cryptic at first, but there are a few standard

+ # transforms from the CST to AST. With these in mind, the actions should

+ # be reasonably legible.

+ #

+ # * Ignore production

+ # Discard this branch. Primarily used when one alternative is empty.

+ #

+ # Sample code:

+ # if len(p) > 1:

+ # p[0] = ...

+ # # Note no assignment if len(p) == 1

+ #

+ # * Eliminate singleton production

+ # Discard this node in the CST, pass the next level down up the tree.

+ # Used to ignore productions only necessary for parsing, but not needed

+ # in the AST.

+ #

+ # Sample code:

+ # p[0] = p[1]

+ #

+ # * Build node

+ # The key type of rule. In this parser, produces object of class IDLNode.

+ # There are several helper functions:

+ # * BuildProduction: actually builds an IDLNode, based on a production.

+ # * BuildAttribute: builds an IDLAttribute, which is a temporary

+ # object to hold a name-value pair, which is then

+ # set as a Property of the IDLNode when the IDLNode

+ # is built.

+ # * BuildNamedProduction: Same as BuildProduction, and sets the 'NAME'

haraken 2013/07/22 01:50:23 BuildNamedProduction => BuildNamed

Nils Barth (inactive) 2013/07/22 06:32:01 (>.<) Done.

+ # property.

+ # * BuildTrue: BuildAttribute with value True, for flags.

+ # See base idl_parser.py for definitions and more examples of use.

+ #

+ # Sample code:

+ # # Build node of type NodeType, with value p[1], and children.

+ # p[0] = self.BuildProduction('NodeType', p, 1, children)

+ #

+ # # Build named node of type NodeType, with name and value p[1].

+ # # (children optional)

+ # p[0] = self.BuildNamed('NodeType', p, 1)

+ #

+ # # Make a list

+ # # Used if one node has several children.

+ # children = ListFromConcat(p[2], p[3])

+ # p[0] = self.BuildProduction('NodeType', p, 1, children)

+ #

+ # # Also used to collapse the right-associative tree

+ # # produced by parsing a list back into a single list.

+ # """Foos : Foo Foos

+ # |"""

+ # if len(p) > 1:

+ # p[0] = ListFromConcat(p[1], p[2])

+ #

+ # # Add children.

+ # # Primarily used to add attributes, produced via BuildTrue.

+ # # p_StaticAttribute

+ # """StaticAttribute : STATIC Attribute"""

+ # p[2].AddChildren(self.BuildTrue('STATIC'))

+ # p[0] = p[2]

+ #

+ # Numbering scheme for the rules is:

+ # [1] for Web IDL spec (or additions in base parser)

+ # These should all be upstreamed to the base parser.

+ # [b1] for Blink IDL changes (overrides Web IDL)

+ # [b1.1] for Blink IDL additions, auxiliary rules for [b1]

+ # Numbers are as per Candidate Recommendation 19 April 2012:

+ # http://www.w3.org/TR/2012/CR-WebIDL-20120419/

+ # [0] Override grammar, since we strip comments

+ # (not in Web IDL)

+ # FIXME: Upstream

+ def p_Top(self, p):

+ """Top : Definitions"""

+ p[0] = p[1]

+ # [3] Override action, since we distinguish callbacks

+ # FIXME: Upstream

+ def p_CallbackOrInterface(self, p):

+ """CallbackOrInterface : CALLBACK CallbackRestOrInterface

+ | Interface"""

+ if len(p) > 2:

+ p[2].AddChildren(self.BuildTrue('CALLBACK'))

+ p[0] = p[2]

+ else:

+ p[0] = p[1]

+ # [b27] Add strings, more 'Literal' productions

+ # 'Literal's needed because integers and strings are both internally strings

+ def p_ConstValue(self, p):

+ """ConstValue : BooleanLiteral

+ | FloatLiteral

+ | IntegerLiteral

+ | StringLiteral

+ | null"""

+ # Standard is (no 'string', fewer 'Literal's):

+ # ConstValue : BooleanLiteral

+ # | FloatLiteral

+ # | integer

+ # | NULL

+ p[0] = p[1]

+ # [b27.1]

+ def p_IntegerLiteral(self, p):

+ """IntegerLiteral : integer"""

+ p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'),

+ self.BuildAttribute('NAME', p[1]))

+ # [b27.2]

+ def p_StringLiteral(self, p):

+ """StringLiteral : string"""

+ p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'),

+ self.BuildAttribute('NAME', p[1]))

+ # [b30] Add StaticAttribute

+ def p_AttributeOrOperation(self, p):

+ """AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation

+ | Attribute

+ | StaticAttribute

+ | Operation"""

+ # Standard is (no StaticAttribute):

+ # AttributeOrOperation : STRINGIFIER StringifierAttributeOrOperation

+ # | Attribute

+ # | Operation

+ if len(p) > 2:

+ p[0] = p[2]

haraken 2013/07/22 01:50:23 As mentioned in the previous comment, it looks inc

Nils Barth (inactive) 2013/07/22 06:32:01 Yup, done!

+ else:

+ p[0] = p[1]

+ # [b30.1]

+ def p_StaticAttribute(self, p):

+ """StaticAttribute : STATIC Attribute"""

+ p[2].AddChildren(self.BuildTrue('STATIC'))

+ p[0] = p[2]

+ # [b47]

+ def p_ExceptionMember(self, p):

+ """ExceptionMember : Const

+ | ExceptionField

+ | Attribute

+ | ExceptionOperation"""

+ # Standard is (no Attribute, no ExceptionOperation):

+ # ExceptionMember : Const

+ # | ExceptionField

+ # FIXME: In DOMException.idl, Attributes should be changed to

+ # ExceptionFields, and Attribute removed from this rule.

+ p[0] = p[1]

+ # [b47.1]

+ def p_ExceptionOperation(self, p):

+ """ExceptionOperation : Type identifier '(' ')' ';'"""

+ # Needed to handle one case in DOMException.idl:

+ # // Override in a Mozilla compatible format

+ # [NotEnumerable] DOMString toString();

+ # Limited form of Operation to prevent others from being added.

+ p[0] = self.BuildNamed('ExceptionOperation', p, 2, p[1])

+ # Extended attributes

+ # [b49] Override base parser: remove comment field, since comments stripped

+ # FIXME: Upstream

+ def p_ExtendedAttributeList(self, p):

+ """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'

+ | '[' ']'

+ | """

+ if len(p) > 3:

+ items = ListFromConcat(p[2], p[3])

+ p[0] = self.BuildProduction('ExtAttributes', p, 1, items)

+ # [b50] Allow optional trailing comma

haraken 2013/07/22 01:50:23 Shall we add a FIXME that says this is currently a

Nils Barth (inactive) 2013/07/22 06:32:01 Done. Not sure if it will be fixed, since Cameron

+ def p_ExtendedAttributes(self, p):

+ """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes

+ | ','

+ |"""

+ if len(p) > 2:

haraken 2013/07/22 01:50:23 Nit: len(p) > 3

Nils Barth (inactive) 2013/07/22 06:32:01 Technically > 2 is ok (since length is 1, 2, or 4)

+ p[0] = ListFromConcat(p[2], p[3])

+ # [b51] Add ExtendedAttributeIdentAndOrIdent

+ def p_ExtendedAttribute(self, p):

+ """ExtendedAttribute : ExtendedAttributeNoArgs

+ | ExtendedAttributeArgList

+ | ExtendedAttributeIdent

+ | ExtendedAttributeIdentAndOrIdent

+ | ExtendedAttributeNamedArgList"""

+ p[0] = p[1]

+ # [59]

+ # FIXME: Upstream UnionType

+ def p_UnionType(self, p):

+ """UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ')'"""

+ members = ListFromConcat(p[2], p[4], p[5])

+ p[0] = self.BuildProduction('UnionType', p, 1, members)

+ # [60]

+ def p_UnionMemberType(self, p):

+ """UnionMemberType : NonAnyType

+ | UnionType TypeSuffix

+ | ANY '[' ']' TypeSuffix"""

+ if len(p) == 2:

+ p[0] = p[1]

+ elif len(p) == 3:

+ p[0] = ListFromConcat(p[1], p[2])

+ else:

+ p[0] = ListFromConcat(self.BuildProduction('Any', p, 1), p[4])

+ # [61]

+ def p_UnionMemberTypes(self, p):

+ """UnionMemberTypes : OR UnionMemberType UnionMemberTypes

+ |"""

+ if len(p) > 1:

haraken 2013/07/22 01:50:23 Nit: len(p) > 3

Nils Barth (inactive) 2013/07/22 06:32:01 Ditto, done.

+ p[0] = ListFromConcat(p[2], p[3])

+ # [70] Override base parser to remove non-standard sized array

+ # FIXME: Upstream

+ def p_TypeSuffix(self, p):

+ """TypeSuffix : '[' ']' TypeSuffix

+ | '?' TypeSuffixStartingWithArray

+ |"""

+ if len(p) == 4:

+ p[0] = self.BuildProduction('Array', p, 1, p[3])

+ elif len(p) == 3:

+ p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2])

+ # [b76.1]

+ def p_ExtendedAttributeIdentAndOrIdent(self, p):

+ """ExtendedAttributeIdentAndOrIdent : identifier '=' identifier '&' identifier

haraken 2013/07/22 01:50:23 Help me understand: How is [X=A&B&C] parsed? I tho

Nils Barth (inactive) 2013/07/22 06:32:01 Currently we only ever have 2 flags: A|B or A&B, s

+ | identifier '=' identifier '|' identifier"""

+ value = self.BuildAttribute('VALUE', p[3] + p[4] + p[5])

+ p[0] = self.BuildNamed('ExtAttribute', p, 1, value)

+ def __dir__(self):

+ # Remove REMOVED_RULES from listing so yacc doesn't parse them

+ # FIXME: Upstream

+ keys = set(self.__dict__.keys() + dir(self.__class__))

+ for rule in REMOVED_RULES:

+ keys.remove('p_' + rule)

+ return list(keys)

+ def __init__(self, lexer=None, verbose=False, debug=False, mute_error=False):

+ lexer = lexer or BlinkIDLLexer()

+ self.lexer = lexer

+ self.tokens = lexer.KnownTokens()

+ # Using SLR (instead of LALR) generates the table faster,

+ # but produces the same output. This is ok b/c Web IDL (and Blink IDL)

+ # is an LL(1) grammar, so SLR can parse it.

+ self.yaccobj = yacc.yacc(module=self, debug=debug, method='SLR')

+ self.parse_debug = debug

+ self.verbose = verbose

+ self.mute_error = mute_error

+ self._parse_errors = 0

+ self._parse_warnings = 0

+ self._last_error_msg = None

+ self._last_error_lineno = 0

+ self._last_error_pos = 0

+# If run by itself, attempt to build the parser

+if __name__ == '__main__':

+ parser = BlinkIDLParser()