Index: mojo/public/tools/bindings/pylib/mojom/parse/lexer.py |
diff --git a/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py b/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py |
deleted file mode 100644 |
index 476a66275782646ffda42fee4e678a77a3435259..0000000000000000000000000000000000000000 |
--- a/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py |
+++ /dev/null |
@@ -1,253 +0,0 @@ |
-# Copyright 2014 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-import imp |
-import os.path |
-import sys |
- |
-def _GetDirAbove(dirname): |
- """Returns the directory "above" this file containing |dirname| (which must |
- also be "above" this file).""" |
- path = os.path.abspath(__file__) |
- while True: |
- path, tail = os.path.split(path) |
- assert tail |
- if tail == dirname: |
- return path |
- |
-try: |
- imp.find_module("ply") |
-except ImportError: |
- sys.path.append(os.path.join(_GetDirAbove("public"), "public/third_party")) |
-from ply.lex import TOKEN |
- |
-from ..error import Error |
- |
- |
-class LexError(Error): |
- """Class for errors from the lexer.""" |
- |
- def __init__(self, filename, message, lineno): |
- Error.__init__(self, filename, message, lineno=lineno) |
- |
- |
-# We have methods which look like they could be functions: |
-# pylint: disable=R0201 |
-class Lexer(object): |
- |
- def __init__(self, filename): |
- self.filename = filename |
- |
- ######################-- PRIVATE --###################### |
- |
- ## |
- ## Internal auxiliary methods |
- ## |
- def _error(self, msg, token): |
- raise LexError(self.filename, msg, token.lineno) |
- |
- ## |
- ## Reserved keywords |
- ## |
- keywords = ( |
- 'HANDLE', |
- |
- 'IMPORT', |
- 'MODULE', |
- 'STRUCT', |
- 'UNION', |
- 'INTERFACE', |
- 'ENUM', |
- 'CONST', |
- 'TRUE', |
- 'FALSE', |
- 'DEFAULT', |
- 'ARRAY', |
- 'MAP' |
- ) |
- |
- keyword_map = {} |
- for keyword in keywords: |
- keyword_map[keyword.lower()] = keyword |
- |
- ## |
- ## All the tokens recognized by the lexer |
- ## |
- tokens = keywords + ( |
- # Identifiers |
- 'NAME', |
- |
- # Constants |
- 'ORDINAL', |
- 'INT_CONST_DEC', 'INT_CONST_HEX', |
- 'FLOAT_CONST', |
- |
- # String literals |
- 'STRING_LITERAL', |
- |
- # Operators |
- 'MINUS', |
- 'PLUS', |
- 'AMP', |
- 'QSTN', |
- |
- # Assignment |
- 'EQUALS', |
- |
- # Request / response |
- 'RESPONSE', |
- |
- # Delimiters |
- 'LPAREN', 'RPAREN', # ( ) |
- 'LBRACKET', 'RBRACKET', # [ ] |
- 'LBRACE', 'RBRACE', # { } |
- 'LANGLE', 'RANGLE', # < > |
- 'SEMI', # ; |
- 'COMMA', 'DOT' # , . |
- ) |
- |
- ## |
- ## Regexes for use in tokens |
- ## |
- |
- # valid C identifiers (K&R2: A.2.3) |
- identifier = r'[a-zA-Z_][0-9a-zA-Z_]*' |
- |
- hex_prefix = '0[xX]' |
- hex_digits = '[0-9a-fA-F]+' |
- |
- # integer constants (K&R2: A.2.5.1) |
- decimal_constant = '0|([1-9][0-9]*)' |
- hex_constant = hex_prefix+hex_digits |
- # Don't allow octal constants (even invalid octal). |
- octal_constant_disallowed = '0[0-9]+' |
- |
- # character constants (K&R2: A.2.5.2) |
- # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line |
- # directives with Windows paths as filenames (..\..\dir\file) |
- # For the same reason, decimal_escape allows all digit sequences. We want to |
- # parse all correct code, even if it means to sometimes parse incorrect |
- # code. |
- # |
- simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" |
- decimal_escape = r"""(\d+)""" |
- hex_escape = r"""(x[0-9a-fA-F]+)""" |
- bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" |
- |
- escape_sequence = \ |
- r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' |
- |
- # string literals (K&R2: A.2.6) |
- string_char = r"""([^"\\\n]|"""+escape_sequence+')' |
- string_literal = '"'+string_char+'*"' |
- bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' |
- |
- # floating constants (K&R2: A.2.5.3) |
- exponent_part = r"""([eE][-+]?[0-9]+)""" |
- fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" |
- floating_constant = \ |
- '(((('+fractional_constant+')'+ \ |
- exponent_part+'?)|([0-9]+'+exponent_part+')))' |
- |
- # Ordinals |
- ordinal = r'@[0-9]+' |
- missing_ordinal_value = r'@' |
- # Don't allow ordinal values in octal (even invalid octal, like 09) or |
- # hexadecimal. |
- octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))' |
- |
- ## |
- ## Rules for the normal state |
- ## |
- t_ignore = ' \t\r' |
- |
- # Newlines |
- def t_NEWLINE(self, t): |
- r'\n+' |
- t.lexer.lineno += len(t.value) |
- |
- # Operators |
- t_MINUS = r'-' |
- t_PLUS = r'\+' |
- t_AMP = r'&' |
- t_QSTN = r'\?' |
- |
- # = |
- t_EQUALS = r'=' |
- |
- # => |
- t_RESPONSE = r'=>' |
- |
- # Delimiters |
- t_LPAREN = r'\(' |
- t_RPAREN = r'\)' |
- t_LBRACKET = r'\[' |
- t_RBRACKET = r'\]' |
- t_LBRACE = r'\{' |
- t_RBRACE = r'\}' |
- t_LANGLE = r'<' |
- t_RANGLE = r'>' |
- t_COMMA = r',' |
- t_DOT = r'\.' |
- t_SEMI = r';' |
- |
- t_STRING_LITERAL = string_literal |
- |
- # The following floating and integer constants are defined as |
- # functions to impose a strict order (otherwise, decimal |
- # is placed before the others because its regex is longer, |
- # and this is bad) |
- # |
- @TOKEN(floating_constant) |
- def t_FLOAT_CONST(self, t): |
- return t |
- |
- @TOKEN(hex_constant) |
- def t_INT_CONST_HEX(self, t): |
- return t |
- |
- @TOKEN(octal_constant_disallowed) |
- def t_OCTAL_CONSTANT_DISALLOWED(self, t): |
- msg = "Octal values not allowed" |
- self._error(msg, t) |
- |
- @TOKEN(decimal_constant) |
- def t_INT_CONST_DEC(self, t): |
- return t |
- |
- # unmatched string literals are caught by the preprocessor |
- |
- @TOKEN(bad_string_literal) |
- def t_BAD_STRING_LITERAL(self, t): |
- msg = "String contains invalid escape code" |
- self._error(msg, t) |
- |
- # Handle ordinal-related tokens in the right order: |
- @TOKEN(octal_or_hex_ordinal_disallowed) |
- def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t): |
- msg = "Octal and hexadecimal ordinal values not allowed" |
- self._error(msg, t) |
- |
- @TOKEN(ordinal) |
- def t_ORDINAL(self, t): |
- return t |
- |
- @TOKEN(missing_ordinal_value) |
- def t_BAD_ORDINAL(self, t): |
- msg = "Missing ordinal value" |
- self._error(msg, t) |
- |
- @TOKEN(identifier) |
- def t_NAME(self, t): |
- t.type = self.keyword_map.get(t.value, "NAME") |
- return t |
- |
- # Ignore C and C++ style comments |
- def t_COMMENT(self, t): |
- r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' |
- t.lexer.lineno += t.value.count("\n") |
- |
- def t_error(self, t): |
- msg = "Illegal character %s" % repr(t.value[0]) |
- self._error(msg, t) |