Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1270)

Unified Diff: mojo/public/bindings/parse/mojo_lexer.py

Issue 130443003: Add support for using expressions as enum values. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: copyright Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | mojo/public/bindings/parse/mojo_parser.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: mojo/public/bindings/parse/mojo_lexer.py
diff --git a/mojo/public/bindings/parse/mojo_lexer.py b/mojo/public/bindings/parse/mojo_lexer.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb19fbb97698d88cdcf5259f80e4953d5fcd37ee
--- /dev/null
+++ b/mojo/public/bindings/parse/mojo_lexer.py
@@ -0,0 +1,304 @@
+# PLY based Lexer class, based on pycparser by Eli Bendersky.
+#
+# Copyright (c) 2012, Eli Bendersky
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of Eli Bendersky nor the names of its contributors may
+#   be used to endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import re
+import sys
+import os.path
+
+# Try to load the ply module, if not, then assume it is in the third_party
+# directory.
+try:
+ # Disable lint check which fails to find the ply module.
+ # pylint: disable=F0401
+ from ply.lex import TOKEN
+except ImportError:
+ module_path, module_name = os.path.split(__file__)
+ third_party = os.path.join(
+ module_path, os.pardir, os.pardir, os.pardir, os.pardir, 'third_party')
+ sys.path.append(third_party)
+ # pylint: disable=F0401
+ from ply.lex import TOKEN
+
+
+class Lexer(object):
+ ######################-- PRIVATE --######################
+
+ ##
+ ## Internal auxiliary methods
+ ##
+ def _error(self, msg, token):
+ print('%s at line %d' % (msg, token.lineno))
+ self.lexer.skip(1)
+
+ ##
+ ## Reserved keywords
+ ##
+ keywords = (
+ 'HANDLE',
+ 'DATA_PIPE_CONSUMER',
+ 'DATA_PIPE_PRODUCER',
+ 'MESSAGE_PIPE',
+
+ 'MODULE',
+ 'STRUCT',
+ 'INTERFACE',
+ 'ENUM',
+ 'VOID',
+ )
+
+ keyword_map = {}
+ for keyword in keywords:
+ keyword_map[keyword.lower()] = keyword
+
+ ##
+ ## All the tokens recognized by the lexer
+ ##
+ tokens = keywords + (
+ # Identifiers
+ 'NAME',
+
+ # constants
+ 'ORDINAL',
+ 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
+ 'FLOAT_CONST', 'HEX_FLOAT_CONST',
+ 'CHAR_CONST',
+ 'WCHAR_CONST',
+
+ # String literals
+ 'STRING_LITERAL',
+ 'WSTRING_LITERAL',
+
+ # Operators
+ 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
+ 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
+ 'LOR', 'LAND', 'LNOT',
+ 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
+
+ # Assignment
+ 'EQUALS',
+
+ # Conditional operator (?)
+ 'CONDOP',
+
+ # Delimeters
+ 'LPAREN', 'RPAREN', # ( )
+ 'LBRACKET', 'RBRACKET', # [ ]
+ 'LBRACE', 'RBRACE', # { }
+ 'SEMI', 'COLON', # ; :
+ 'COMMA', # .
+ )
+
+ ##
+ ## Regexes for use in tokens
+ ##
+ ##
+
+ # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
+ identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
+
+ hex_prefix = '0[xX]'
+ hex_digits = '[0-9a-fA-F]+'
+
+ # integer constants (K&R2: A.2.5.1)
+ integer_suffix_opt = \
+ r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
+ decimal_constant = \
+ '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
+ octal_constant = '0[0-7]*'+integer_suffix_opt
+ hex_constant = hex_prefix+hex_digits+integer_suffix_opt
+
+ bad_octal_constant = '0[0-7]*[89]'
+
+ # character constants (K&R2: A.2.5.2)
+ # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
+ # directives with Windows paths as filenames (..\..\dir\file)
+ # For the same reason, decimal_escape allows all digit sequences. We want to
+ # parse all correct code, even if it means to sometimes parse incorrect
+ # code.
+ #
+ simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
+ decimal_escape = r"""(\d+)"""
+ hex_escape = r"""(x[0-9a-fA-F]+)"""
+ bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+
+ escape_sequence = \
+ r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
+ cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
+ char_const = "'"+cconst_char+"'"
+ wchar_const = 'L'+char_const
+ unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
+ bad_char_const = \
+ r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \
+ bad_escape+r"""[^'\n]*')"""
+
+ # string literals (K&R2: A.2.6)
+ string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+ string_literal = '"'+string_char+'*"'
+ wstring_literal = 'L'+string_literal
+ bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
+
+ # floating constants (K&R2: A.2.5.3)
+ exponent_part = r"""([eE][-+]?[0-9]+)"""
+ fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
+ floating_constant = \
+ '(((('+fractional_constant+')'+ \
+ exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
+ binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
+ hex_fractional_constant = \
+ '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
+ hex_floating_constant = \
+ '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+ \
+ binary_exponent_part+'[FfLl]?)'
+
+ ##
+ ## Rules for the normal state
+ ##
+ t_ignore = ' \t'
+
+ # Newlines
+ def t_NEWLINE(self, t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+ # Operators
+ t_PLUS = r'\+'
+ t_MINUS = r'-'
+ t_TIMES = r'\*'
+ t_DIVIDE = r'/'
+ t_MOD = r'%'
+ t_OR = r'\|'
+ t_AND = r'&'
+ t_NOT = r'~'
+ t_XOR = r'\^'
+ t_LSHIFT = r'<<'
+ t_RSHIFT = r'>>'
+ t_LOR = r'\|\|'
+ t_LAND = r'&&'
+ t_LNOT = r'!'
+ t_LT = r'<'
+ t_GT = r'>'
+ t_LE = r'<='
+ t_GE = r'>='
+ t_EQ = r'=='
+ t_NE = r'!='
+
+ # =
+ t_EQUALS = r'='
+
+ # ?
+ t_CONDOP = r'\?'
+
+ # Delimeters
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_LBRACKET = r'\['
+ t_RBRACKET = r'\]'
+ t_LBRACE = r'\{'
+ t_RBRACE = r'\}'
+ t_COMMA = r','
+ t_SEMI = r';'
+ t_COLON = r':'
+
+ t_STRING_LITERAL = string_literal
+ t_ORDINAL = r'@[0-9]*'
+
+ # The following floating and integer constants are defined as
+ # functions to impose a strict order (otherwise, decimal
+ # is placed before the others because its regex is longer,
+ # and this is bad)
+ #
+ @TOKEN(floating_constant)
+ def t_FLOAT_CONST(self, t):
+ return t
+
+ @TOKEN(hex_floating_constant)
+ def t_HEX_FLOAT_CONST(self, t):
+ return t
+
+ @TOKEN(hex_constant)
+ def t_INT_CONST_HEX(self, t):
+ return t
+
+ @TOKEN(bad_octal_constant)
+ def t_BAD_CONST_OCT(self, t):
+ msg = "Invalid octal constant"
+ self._error(msg, t)
+
+ @TOKEN(octal_constant)
+ def t_INT_CONST_OCT(self, t):
+ return t
+
+ @TOKEN(decimal_constant)
+ def t_INT_CONST_DEC(self, t):
+ return t
+
+ # Must come before bad_char_const, to prevent it from
+ # catching valid char constants as invalid
+ #
+ @TOKEN(char_const)
+ def t_CHAR_CONST(self, t):
+ return t
+
+ @TOKEN(wchar_const)
+ def t_WCHAR_CONST(self, t):
+ return t
+
+ @TOKEN(unmatched_quote)
+ def t_UNMATCHED_QUOTE(self, t):
+ msg = "Unmatched '"
+ self._error(msg, t)
+
+ @TOKEN(bad_char_const)
+ def t_BAD_CHAR_CONST(self, t):
+ msg = "Invalid char constant %s" % t.value
+ self._error(msg, t)
+
+ @TOKEN(wstring_literal)
+ def t_WSTRING_LITERAL(self, t):
+ return t
+
+ # unmatched string literals are caught by the preprocessor
+
+ @TOKEN(bad_string_literal)
+ def t_BAD_STRING_LITERAL(self, t):
+ msg = "String contains invalid escape code"
+ self._error(msg, t)
+
+ @TOKEN(identifier)
+ def t_NAME(self, t):
+ t.type = self.keyword_map.get(t.value, "NAME")
+ return t
+
+ # Ignore C and C++ style comments
+ def t_COMMENT(self, t):
+ r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
+ pass
+
+ def t_error(self, t):
+ msg = 'Illegal character %s' % repr(t.value[0])
+ self._error(msg, t)
« no previous file with comments | « no previous file | mojo/public/bindings/parse/mojo_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698