Index: trunk/src/mojo/public/bindings/parse/mojo_lexer.py |
=================================================================== |
--- trunk/src/mojo/public/bindings/parse/mojo_lexer.py (revision 244236) |
+++ trunk/src/mojo/public/bindings/parse/mojo_lexer.py (working copy) |
@@ -1,304 +0,0 @@ |
-# PLY based Lexer class, based on pycparser by Eli Bendersky. |
-# |
-# Copyright (c) 2012, Eli Bendersky |
-# All rights reserved. |
-# |
-# Redistribution and use in source and binary forms, with or without modification, |
-# are permitted provided that the following conditions are met: |
-# |
-# * Redistributions of source code must retain the above copyright notice, this |
-# list of conditions and the following disclaimer. |
-# * Redistributions in binary form must reproduce the above copyright notice, |
-# this list of conditions and the following disclaimer in the documentation |
-# and/or other materials provided with the distribution. |
-# * Neither the name of Eli Bendersky nor the names of its contributors may |
-# be used to endorse or promote products derived from this software without |
-# specific prior written permission. |
-# |
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
-# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
-# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
- |
-import re |
-import sys |
-import os.path |
- |
-# Try to load the ply module, if not, then assume it is in the third_party |
-# directory. |
-try: |
- # Disable lint check which fails to find the ply module. |
- # pylint: disable=F0401 |
- from ply.lex import TOKEN |
-except ImportError: |
- module_path, module_name = os.path.split(__file__) |
- third_party = os.path.join( |
- module_path, os.pardir, os.pardir, os.pardir, os.pardir, 'third_party') |
- sys.path.append(third_party) |
- # pylint: disable=F0401 |
- from ply.lex import TOKEN |
- |
- |
-class Lexer(object): |
- ######################-- PRIVATE --###################### |
- |
- ## |
- ## Internal auxiliary methods |
- ## |
- def _error(self, msg, token): |
- print('%s at line %d' % (msg, token.lineno)) |
- self.lexer.skip(1) |
- |
- ## |
- ## Reserved keywords |
- ## |
- keywords = ( |
- 'HANDLE', |
- 'DATA_PIPE_CONSUMER', |
- 'DATA_PIPE_PRODUCER', |
- 'MESSAGE_PIPE', |
- |
- 'MODULE', |
- 'STRUCT', |
- 'INTERFACE', |
- 'ENUM', |
- 'VOID', |
- ) |
- |
- keyword_map = {} |
- for keyword in keywords: |
- keyword_map[keyword.lower()] = keyword |
- |
- ## |
- ## All the tokens recognized by the lexer |
- ## |
- tokens = keywords + ( |
- # Identifiers |
- 'NAME', |
- |
- # constants |
- 'ORDINAL', |
- 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', |
- 'FLOAT_CONST', 'HEX_FLOAT_CONST', |
- 'CHAR_CONST', |
- 'WCHAR_CONST', |
- |
- # String literals |
- 'STRING_LITERAL', |
- 'WSTRING_LITERAL', |
- |
- # Operators |
- 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', |
- 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', |
- 'LOR', 'LAND', 'LNOT', |
- 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', |
- |
- # Assignment |
- 'EQUALS', |
- |
- # Conditional operator (?) |
- 'CONDOP', |
- |
- # Delimeters |
- 'LPAREN', 'RPAREN', # ( ) |
- 'LBRACKET', 'RBRACKET', # [ ] |
- 'LBRACE', 'RBRACE', # { } |
- 'SEMI', 'COLON', # ; : |
- 'COMMA', # . |
- ) |
- |
- ## |
- ## Regexes for use in tokens |
- ## |
- ## |
- |
- # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers) |
- identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*' |
- |
- hex_prefix = '0[xX]' |
- hex_digits = '[0-9a-fA-F]+' |
- |
- # integer constants (K&R2: A.2.5.1) |
- integer_suffix_opt = \ |
- r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?' |
- decimal_constant = \ |
- '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')' |
- octal_constant = '0[0-7]*'+integer_suffix_opt |
- hex_constant = hex_prefix+hex_digits+integer_suffix_opt |
- |
- bad_octal_constant = '0[0-7]*[89]' |
- |
- # character constants (K&R2: A.2.5.2) |
- # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line |
- # directives with Windows paths as filenames (..\..\dir\file) |
- # For the same reason, decimal_escape allows all digit sequences. We want to |
- # parse all correct code, even if it means to sometimes parse incorrect |
- # code. |
- # |
- simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" |
- decimal_escape = r"""(\d+)""" |
- hex_escape = r"""(x[0-9a-fA-F]+)""" |
- bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" |
- |
- escape_sequence = \ |
- r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' |
- cconst_char = r"""([^'\\\n]|"""+escape_sequence+')' |
- char_const = "'"+cconst_char+"'" |
- wchar_const = 'L'+char_const |
- unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)" |
- bad_char_const = \ |
- r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \ |
- bad_escape+r"""[^'\n]*')""" |
- |
- # string literals (K&R2: A.2.6) |
- string_char = r"""([^"\\\n]|"""+escape_sequence+')' |
- string_literal = '"'+string_char+'*"' |
- wstring_literal = 'L'+string_literal |
- bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' |
- |
- # floating constants (K&R2: A.2.5.3) |
- exponent_part = r"""([eE][-+]?[0-9]+)""" |
- fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" |
- floating_constant = \ |
- '(((('+fractional_constant+')'+ \ |
- exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)' |
- binary_exponent_part = r'''([pP][+-]?[0-9]+)''' |
- hex_fractional_constant = \ |
- '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))""" |
- hex_floating_constant = \ |
- '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+ \ |
- binary_exponent_part+'[FfLl]?)' |
- |
- ## |
- ## Rules for the normal state |
- ## |
- t_ignore = ' \t' |
- |
- # Newlines |
- def t_NEWLINE(self, t): |
- r'\n+' |
- t.lexer.lineno += t.value.count("\n") |
- |
- # Operators |
- t_PLUS = r'\+' |
- t_MINUS = r'-' |
- t_TIMES = r'\*' |
- t_DIVIDE = r'/' |
- t_MOD = r'%' |
- t_OR = r'\|' |
- t_AND = r'&' |
- t_NOT = r'~' |
- t_XOR = r'\^' |
- t_LSHIFT = r'<<' |
- t_RSHIFT = r'>>' |
- t_LOR = r'\|\|' |
- t_LAND = r'&&' |
- t_LNOT = r'!' |
- t_LT = r'<' |
- t_GT = r'>' |
- t_LE = r'<=' |
- t_GE = r'>=' |
- t_EQ = r'==' |
- t_NE = r'!=' |
- |
- # = |
- t_EQUALS = r'=' |
- |
- # ? |
- t_CONDOP = r'\?' |
- |
- # Delimeters |
- t_LPAREN = r'\(' |
- t_RPAREN = r'\)' |
- t_LBRACKET = r'\[' |
- t_RBRACKET = r'\]' |
- t_LBRACE = r'\{' |
- t_RBRACE = r'\}' |
- t_COMMA = r',' |
- t_SEMI = r';' |
- t_COLON = r':' |
- |
- t_STRING_LITERAL = string_literal |
- t_ORDINAL = r'@[0-9]*' |
- |
- # The following floating and integer constants are defined as |
- # functions to impose a strict order (otherwise, decimal |
- # is placed before the others because its regex is longer, |
- # and this is bad) |
- # |
- @TOKEN(floating_constant) |
- def t_FLOAT_CONST(self, t): |
- return t |
- |
- @TOKEN(hex_floating_constant) |
- def t_HEX_FLOAT_CONST(self, t): |
- return t |
- |
- @TOKEN(hex_constant) |
- def t_INT_CONST_HEX(self, t): |
- return t |
- |
- @TOKEN(bad_octal_constant) |
- def t_BAD_CONST_OCT(self, t): |
- msg = "Invalid octal constant" |
- self._error(msg, t) |
- |
- @TOKEN(octal_constant) |
- def t_INT_CONST_OCT(self, t): |
- return t |
- |
- @TOKEN(decimal_constant) |
- def t_INT_CONST_DEC(self, t): |
- return t |
- |
- # Must come before bad_char_const, to prevent it from |
- # catching valid char constants as invalid |
- # |
- @TOKEN(char_const) |
- def t_CHAR_CONST(self, t): |
- return t |
- |
- @TOKEN(wchar_const) |
- def t_WCHAR_CONST(self, t): |
- return t |
- |
- @TOKEN(unmatched_quote) |
- def t_UNMATCHED_QUOTE(self, t): |
- msg = "Unmatched '" |
- self._error(msg, t) |
- |
- @TOKEN(bad_char_const) |
- def t_BAD_CHAR_CONST(self, t): |
- msg = "Invalid char constant %s" % t.value |
- self._error(msg, t) |
- |
- @TOKEN(wstring_literal) |
- def t_WSTRING_LITERAL(self, t): |
- return t |
- |
- # unmatched string literals are caught by the preprocessor |
- |
- @TOKEN(bad_string_literal) |
- def t_BAD_STRING_LITERAL(self, t): |
- msg = "String contains invalid escape code" |
- self._error(msg, t) |
- |
- @TOKEN(identifier) |
- def t_NAME(self, t): |
- t.type = self.keyword_map.get(t.value, "NAME") |
- return t |
- |
- # Ignore C and C++ style comments |
- def t_COMMENT(self, t): |
- r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' |
- pass |
- |
- def t_error(self, t): |
- msg = 'Illegal character %s' % repr(t.value[0]) |
- self._error(msg, t) |