| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. |  | 
| 2 # Use of this source code is governed by a BSD-style license that can be |  | 
| 3 # found in the LICENSE file. |  | 
| 4 |  | 
| 5 import imp |  | 
| 6 import os.path |  | 
| 7 import sys |  | 
| 8 |  | 
| 9 def _GetDirAbove(dirname): |  | 
| 10   """Returns the directory "above" this file containing |dirname| (which must |  | 
| 11   also be "above" this file).""" |  | 
| 12   path = os.path.abspath(__file__) |  | 
| 13   while True: |  | 
| 14     path, tail = os.path.split(path) |  | 
| 15     assert tail |  | 
| 16     if tail == dirname: |  | 
| 17       return path |  | 
| 18 |  | 
| 19 try: |  | 
| 20   imp.find_module("ply") |  | 
| 21 except ImportError: |  | 
| 22   sys.path.append(os.path.join(_GetDirAbove("public"), "public/third_party")) |  | 
| 23 from ply.lex import TOKEN |  | 
| 24 |  | 
| 25 from ..error import Error |  | 
| 26 |  | 
| 27 |  | 
| 28 class LexError(Error): |  | 
| 29   """Class for errors from the lexer.""" |  | 
| 30 |  | 
| 31   def __init__(self, filename, message, lineno): |  | 
| 32     Error.__init__(self, filename, message, lineno=lineno) |  | 
| 33 |  | 
| 34 |  | 
| 35 # We have methods which look like they could be functions: |  | 
| 36 # pylint: disable=R0201 |  | 
| 37 class Lexer(object): |  | 
| 38 |  | 
| 39   def __init__(self, filename): |  | 
| 40     self.filename = filename |  | 
| 41 |  | 
| 42   ######################--   PRIVATE   --###################### |  | 
| 43 |  | 
| 44   ## |  | 
| 45   ## Internal auxiliary methods |  | 
| 46   ## |  | 
| 47   def _error(self, msg, token): |  | 
| 48     raise LexError(self.filename, msg, token.lineno) |  | 
| 49 |  | 
| 50   ## |  | 
| 51   ## Reserved keywords |  | 
| 52   ## |  | 
| 53   keywords = ( |  | 
| 54     'HANDLE', |  | 
| 55 |  | 
| 56     'IMPORT', |  | 
| 57     'MODULE', |  | 
| 58     'STRUCT', |  | 
| 59     'UNION', |  | 
| 60     'INTERFACE', |  | 
| 61     'ENUM', |  | 
| 62     'CONST', |  | 
| 63     'TRUE', |  | 
| 64     'FALSE', |  | 
| 65     'DEFAULT', |  | 
| 66     'ARRAY', |  | 
| 67     'MAP' |  | 
| 68   ) |  | 
| 69 |  | 
| 70   keyword_map = {} |  | 
| 71   for keyword in keywords: |  | 
| 72     keyword_map[keyword.lower()] = keyword |  | 
| 73 |  | 
| 74   ## |  | 
| 75   ## All the tokens recognized by the lexer |  | 
| 76   ## |  | 
| 77   tokens = keywords + ( |  | 
| 78     # Identifiers |  | 
| 79     'NAME', |  | 
| 80 |  | 
| 81     # Constants |  | 
| 82     'ORDINAL', |  | 
| 83     'INT_CONST_DEC', 'INT_CONST_HEX', |  | 
| 84     'FLOAT_CONST', |  | 
| 85 |  | 
| 86     # String literals |  | 
| 87     'STRING_LITERAL', |  | 
| 88 |  | 
| 89     # Operators |  | 
| 90     'MINUS', |  | 
| 91     'PLUS', |  | 
| 92     'AMP', |  | 
| 93     'QSTN', |  | 
| 94 |  | 
| 95     # Assignment |  | 
| 96     'EQUALS', |  | 
| 97 |  | 
| 98     # Request / response |  | 
| 99     'RESPONSE', |  | 
| 100 |  | 
| 101     # Delimiters |  | 
| 102     'LPAREN', 'RPAREN',         # ( ) |  | 
| 103     'LBRACKET', 'RBRACKET',     # [ ] |  | 
| 104     'LBRACE', 'RBRACE',         # { } |  | 
| 105     'LANGLE', 'RANGLE',         # < > |  | 
| 106     'SEMI',                     # ; |  | 
| 107     'COMMA', 'DOT'              # , . |  | 
| 108   ) |  | 
| 109 |  | 
| 110   ## |  | 
| 111   ## Regexes for use in tokens |  | 
| 112   ## |  | 
| 113 |  | 
| 114   # valid C identifiers (K&R2: A.2.3) |  | 
| 115   identifier = r'[a-zA-Z_][0-9a-zA-Z_]*' |  | 
| 116 |  | 
| 117   hex_prefix = '0[xX]' |  | 
| 118   hex_digits = '[0-9a-fA-F]+' |  | 
| 119 |  | 
| 120   # integer constants (K&R2: A.2.5.1) |  | 
| 121   decimal_constant = '0|([1-9][0-9]*)' |  | 
| 122   hex_constant = hex_prefix+hex_digits |  | 
| 123   # Don't allow octal constants (even invalid octal). |  | 
| 124   octal_constant_disallowed = '0[0-9]+' |  | 
| 125 |  | 
| 126   # character constants (K&R2: A.2.5.2) |  | 
| 127   # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line |  | 
| 128   # directives with Windows paths as filenames (..\..\dir\file) |  | 
| 129   # For the same reason, decimal_escape allows all digit sequences. We want to |  | 
| 130   # parse all correct code, even if it means to sometimes parse incorrect |  | 
| 131   # code. |  | 
| 132   # |  | 
| 133   simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" |  | 
| 134   decimal_escape = r"""(\d+)""" |  | 
| 135   hex_escape = r"""(x[0-9a-fA-F]+)""" |  | 
| 136   bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" |  | 
| 137 |  | 
| 138   escape_sequence = \ |  | 
| 139       r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' |  | 
| 140 |  | 
| 141   # string literals (K&R2: A.2.6) |  | 
| 142   string_char = r"""([^"\\\n]|"""+escape_sequence+')' |  | 
| 143   string_literal = '"'+string_char+'*"' |  | 
| 144   bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' |  | 
| 145 |  | 
| 146   # floating constants (K&R2: A.2.5.3) |  | 
| 147   exponent_part = r"""([eE][-+]?[0-9]+)""" |  | 
| 148   fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" |  | 
| 149   floating_constant = \ |  | 
| 150       '(((('+fractional_constant+')'+ \ |  | 
| 151       exponent_part+'?)|([0-9]+'+exponent_part+')))' |  | 
| 152 |  | 
| 153   # Ordinals |  | 
| 154   ordinal = r'@[0-9]+' |  | 
| 155   missing_ordinal_value = r'@' |  | 
| 156   # Don't allow ordinal values in octal (even invalid octal, like 09) or |  | 
| 157   # hexadecimal. |  | 
| 158   octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))' |  | 
| 159 |  | 
| 160   ## |  | 
| 161   ## Rules for the normal state |  | 
| 162   ## |  | 
| 163   t_ignore = ' \t\r' |  | 
| 164 |  | 
| 165   # Newlines |  | 
| 166   def t_NEWLINE(self, t): |  | 
| 167     r'\n+' |  | 
| 168     t.lexer.lineno += len(t.value) |  | 
| 169 |  | 
| 170   # Operators |  | 
| 171   t_MINUS             = r'-' |  | 
| 172   t_PLUS              = r'\+' |  | 
| 173   t_AMP               = r'&' |  | 
| 174   t_QSTN              = r'\?' |  | 
| 175 |  | 
| 176   # = |  | 
| 177   t_EQUALS            = r'=' |  | 
| 178 |  | 
| 179   # => |  | 
| 180   t_RESPONSE          = r'=>' |  | 
| 181 |  | 
| 182   # Delimiters |  | 
| 183   t_LPAREN            = r'\(' |  | 
| 184   t_RPAREN            = r'\)' |  | 
| 185   t_LBRACKET          = r'\[' |  | 
| 186   t_RBRACKET          = r'\]' |  | 
| 187   t_LBRACE            = r'\{' |  | 
| 188   t_RBRACE            = r'\}' |  | 
| 189   t_LANGLE            = r'<' |  | 
| 190   t_RANGLE            = r'>' |  | 
| 191   t_COMMA             = r',' |  | 
| 192   t_DOT               = r'\.' |  | 
| 193   t_SEMI              = r';' |  | 
| 194 |  | 
| 195   t_STRING_LITERAL    = string_literal |  | 
| 196 |  | 
| 197   # The following floating and integer constants are defined as |  | 
| 198   # functions to impose a strict order (otherwise, decimal |  | 
| 199   # is placed before the others because its regex is longer, |  | 
| 200   # and this is bad) |  | 
| 201   # |  | 
| 202   @TOKEN(floating_constant) |  | 
| 203   def t_FLOAT_CONST(self, t): |  | 
| 204     return t |  | 
| 205 |  | 
| 206   @TOKEN(hex_constant) |  | 
| 207   def t_INT_CONST_HEX(self, t): |  | 
| 208     return t |  | 
| 209 |  | 
| 210   @TOKEN(octal_constant_disallowed) |  | 
| 211   def t_OCTAL_CONSTANT_DISALLOWED(self, t): |  | 
| 212     msg = "Octal values not allowed" |  | 
| 213     self._error(msg, t) |  | 
| 214 |  | 
| 215   @TOKEN(decimal_constant) |  | 
| 216   def t_INT_CONST_DEC(self, t): |  | 
| 217     return t |  | 
| 218 |  | 
| 219   # unmatched string literals are caught by the preprocessor |  | 
| 220 |  | 
| 221   @TOKEN(bad_string_literal) |  | 
| 222   def t_BAD_STRING_LITERAL(self, t): |  | 
| 223     msg = "String contains invalid escape code" |  | 
| 224     self._error(msg, t) |  | 
| 225 |  | 
| 226   # Handle ordinal-related tokens in the right order: |  | 
| 227   @TOKEN(octal_or_hex_ordinal_disallowed) |  | 
| 228   def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t): |  | 
| 229     msg = "Octal and hexadecimal ordinal values not allowed" |  | 
| 230     self._error(msg, t) |  | 
| 231 |  | 
| 232   @TOKEN(ordinal) |  | 
| 233   def t_ORDINAL(self, t): |  | 
| 234     return t |  | 
| 235 |  | 
| 236   @TOKEN(missing_ordinal_value) |  | 
| 237   def t_BAD_ORDINAL(self, t): |  | 
| 238     msg = "Missing ordinal value" |  | 
| 239     self._error(msg, t) |  | 
| 240 |  | 
| 241   @TOKEN(identifier) |  | 
| 242   def t_NAME(self, t): |  | 
| 243     t.type = self.keyword_map.get(t.value, "NAME") |  | 
| 244     return t |  | 
| 245 |  | 
| 246   # Ignore C and C++ style comments |  | 
| 247   def t_COMMENT(self, t): |  | 
| 248     r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' |  | 
| 249     t.lexer.lineno += t.value.count("\n") |  | 
| 250 |  | 
| 251   def t_error(self, t): |  | 
| 252     msg = "Illegal character %s" % repr(t.value[0]) |  | 
| 253     self._error(msg, t) |  | 
| OLD | NEW | 
|---|