| Index: third_party/gmock/scripts/generator/cpp/tokenize.py
|
| diff --git a/third_party/gmock/scripts/generator/cpp/tokenize.py b/third_party/gmock/scripts/generator/cpp/tokenize.py
|
| deleted file mode 100644
|
| index 28c334529980b540797034f90a1031001a6c0bcd..0000000000000000000000000000000000000000
|
| --- a/third_party/gmock/scripts/generator/cpp/tokenize.py
|
| +++ /dev/null
|
| @@ -1,287 +0,0 @@
|
| -#!/usr/bin/env python
|
| -#
|
| -# Copyright 2007 Neal Norwitz
|
| -# Portions Copyright 2007 Google Inc.
|
| -#
|
| -# Licensed under the Apache License, Version 2.0 (the "License");
|
| -# you may not use this file except in compliance with the License.
|
| -# You may obtain a copy of the License at
|
| -#
|
| -# http://www.apache.org/licenses/LICENSE-2.0
|
| -#
|
| -# Unless required by applicable law or agreed to in writing, software
|
| -# distributed under the License is distributed on an "AS IS" BASIS,
|
| -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -# See the License for the specific language governing permissions and
|
| -# limitations under the License.
|
| -
|
| -"""Tokenize C++ source code."""
|
| -
|
| -__author__ = 'nnorwitz@google.com (Neal Norwitz)'
|
| -
|
| -
|
| -try:
|
| - # Python 3.x
|
| - import builtins
|
| -except ImportError:
|
| - # Python 2.x
|
| - import __builtin__ as builtins
|
| -
|
| -
|
| -import sys
|
| -
|
| -from cpp import utils
|
| -
|
| -
|
| -if not hasattr(builtins, 'set'):
|
| - # Nominal support for Python 2.3.
|
| - from sets import Set as set
|
| -
|
| -
|
| -# Add $ as a valid identifier char since so much code uses it.
|
| -_letters = 'abcdefghijklmnopqrstuvwxyz'
|
| -VALID_IDENTIFIER_CHARS = set(_letters + _letters.upper() + '_0123456789$')
|
| -HEX_DIGITS = set('0123456789abcdefABCDEF')
|
| -INT_OR_FLOAT_DIGITS = set('01234567890eE-+')
|
| -
|
| -
|
| -# C++0x string preffixes.
|
| -_STR_PREFIXES = set(('R', 'u8', 'u8R', 'u', 'uR', 'U', 'UR', 'L', 'LR'))
|
| -
|
| -
|
| -# Token types.
|
| -UNKNOWN = 'UNKNOWN'
|
| -SYNTAX = 'SYNTAX'
|
| -CONSTANT = 'CONSTANT'
|
| -NAME = 'NAME'
|
| -PREPROCESSOR = 'PREPROCESSOR'
|
| -
|
| -# Where the token originated from. This can be used for backtracking.
|
| -# It is always set to WHENCE_STREAM in this code.
|
| -WHENCE_STREAM, WHENCE_QUEUE = range(2)
|
| -
|
| -
|
| -class Token(object):
|
| - """Data container to represent a C++ token.
|
| -
|
| - Tokens can be identifiers, syntax char(s), constants, or
|
| - pre-processor directives.
|
| -
|
| - start contains the index of the first char of the token in the source
|
| - end contains the index of the last char of the token in the source
|
| - """
|
| -
|
| - def __init__(self, token_type, name, start, end):
|
| - self.token_type = token_type
|
| - self.name = name
|
| - self.start = start
|
| - self.end = end
|
| - self.whence = WHENCE_STREAM
|
| -
|
| - def __str__(self):
|
| - if not utils.DEBUG:
|
| - return 'Token(%r)' % self.name
|
| - return 'Token(%r, %s, %s)' % (self.name, self.start, self.end)
|
| -
|
| - __repr__ = __str__
|
| -
|
| -
|
| -def _GetString(source, start, i):
|
| - i = source.find('"', i+1)
|
| - while source[i-1] == '\\':
|
| - # Count the trailing backslashes.
|
| - backslash_count = 1
|
| - j = i - 2
|
| - while source[j] == '\\':
|
| - backslash_count += 1
|
| - j -= 1
|
| - # When trailing backslashes are even, they escape each other.
|
| - if (backslash_count % 2) == 0:
|
| - break
|
| - i = source.find('"', i+1)
|
| - return i + 1
|
| -
|
| -
|
| -def _GetChar(source, start, i):
|
| - # NOTE(nnorwitz): may not be quite correct, should be good enough.
|
| - i = source.find("'", i+1)
|
| - while source[i-1] == '\\':
|
| - # Need to special case '\\'.
|
| - if (i - 2) > start and source[i-2] == '\\':
|
| - break
|
| - i = source.find("'", i+1)
|
| - # Try to handle unterminated single quotes (in a #if 0 block).
|
| - if i < 0:
|
| - i = start
|
| - return i + 1
|
| -
|
| -
|
| -def GetTokens(source):
|
| - """Returns a sequence of Tokens.
|
| -
|
| - Args:
|
| - source: string of C++ source code.
|
| -
|
| - Yields:
|
| - Token that represents the next token in the source.
|
| - """
|
| - # Cache various valid character sets for speed.
|
| - valid_identifier_chars = VALID_IDENTIFIER_CHARS
|
| - hex_digits = HEX_DIGITS
|
| - int_or_float_digits = INT_OR_FLOAT_DIGITS
|
| - int_or_float_digits2 = int_or_float_digits | set('.')
|
| -
|
| - # Only ignore errors while in a #if 0 block.
|
| - ignore_errors = False
|
| - count_ifs = 0
|
| -
|
| - i = 0
|
| - end = len(source)
|
| - while i < end:
|
| - # Skip whitespace.
|
| - while i < end and source[i].isspace():
|
| - i += 1
|
| - if i >= end:
|
| - return
|
| -
|
| - token_type = UNKNOWN
|
| - start = i
|
| - c = source[i]
|
| - if c.isalpha() or c == '_': # Find a string token.
|
| - token_type = NAME
|
| - while source[i] in valid_identifier_chars:
|
| - i += 1
|
| - # String and character constants can look like a name if
|
| - # they are something like L"".
|
| - if (source[i] == "'" and (i - start) == 1 and
|
| - source[start:i] in 'uUL'):
|
| - # u, U, and L are valid C++0x character preffixes.
|
| - token_type = CONSTANT
|
| - i = _GetChar(source, start, i)
|
| - elif source[i] == "'" and source[start:i] in _STR_PREFIXES:
|
| - token_type = CONSTANT
|
| - i = _GetString(source, start, i)
|
| - elif c == '/' and source[i+1] == '/': # Find // comments.
|
| - i = source.find('\n', i)
|
| - if i == -1: # Handle EOF.
|
| - i = end
|
| - continue
|
| - elif c == '/' and source[i+1] == '*': # Find /* comments. */
|
| - i = source.find('*/', i) + 2
|
| - continue
|
| - elif c in ':+-<>&|*=': # : or :: (plus other chars).
|
| - token_type = SYNTAX
|
| - i += 1
|
| - new_ch = source[i]
|
| - if new_ch == c:
|
| - i += 1
|
| - elif c == '-' and new_ch == '>':
|
| - i += 1
|
| - elif new_ch == '=':
|
| - i += 1
|
| - elif c in '()[]{}~!?^%;/.,': # Handle single char tokens.
|
| - token_type = SYNTAX
|
| - i += 1
|
| - if c == '.' and source[i].isdigit():
|
| - token_type = CONSTANT
|
| - i += 1
|
| - while source[i] in int_or_float_digits:
|
| - i += 1
|
| - # Handle float suffixes.
|
| - for suffix in ('l', 'f'):
|
| - if suffix == source[i:i+1].lower():
|
| - i += 1
|
| - break
|
| - elif c.isdigit(): # Find integer.
|
| - token_type = CONSTANT
|
| - if c == '0' and source[i+1] in 'xX':
|
| - # Handle hex digits.
|
| - i += 2
|
| - while source[i] in hex_digits:
|
| - i += 1
|
| - else:
|
| - while source[i] in int_or_float_digits2:
|
| - i += 1
|
| - # Handle integer (and float) suffixes.
|
| - for suffix in ('ull', 'll', 'ul', 'l', 'f', 'u'):
|
| - size = len(suffix)
|
| - if suffix == source[i:i+size].lower():
|
| - i += size
|
| - break
|
| - elif c == '"': # Find string.
|
| - token_type = CONSTANT
|
| - i = _GetString(source, start, i)
|
| - elif c == "'": # Find char.
|
| - token_type = CONSTANT
|
| - i = _GetChar(source, start, i)
|
| - elif c == '#': # Find pre-processor command.
|
| - token_type = PREPROCESSOR
|
| - got_if = source[i:i+3] == '#if' and source[i+3:i+4].isspace()
|
| - if got_if:
|
| - count_ifs += 1
|
| - elif source[i:i+6] == '#endif':
|
| - count_ifs -= 1
|
| - if count_ifs == 0:
|
| - ignore_errors = False
|
| -
|
| - # TODO(nnorwitz): handle preprocessor statements (\ continuations).
|
| - while 1:
|
| - i1 = source.find('\n', i)
|
| - i2 = source.find('//', i)
|
| - i3 = source.find('/*', i)
|
| - i4 = source.find('"', i)
|
| - # NOTE(nnorwitz): doesn't handle comments in #define macros.
|
| - # Get the first important symbol (newline, comment, EOF/end).
|
| - i = min([x for x in (i1, i2, i3, i4, end) if x != -1])
|
| -
|
| - # Handle #include "dir//foo.h" properly.
|
| - if source[i] == '"':
|
| - i = source.find('"', i+1) + 1
|
| - assert i > 0
|
| - continue
|
| - # Keep going if end of the line and the line ends with \.
|
| - if not (i == i1 and source[i-1] == '\\'):
|
| - if got_if:
|
| - condition = source[start+4:i].lstrip()
|
| - if (condition.startswith('0') or
|
| - condition.startswith('(0)')):
|
| - ignore_errors = True
|
| - break
|
| - i += 1
|
| - elif c == '\\': # Handle \ in code.
|
| - # This is different from the pre-processor \ handling.
|
| - i += 1
|
| - continue
|
| - elif ignore_errors:
|
| - # The tokenizer seems to be in pretty good shape. This
|
| - # raise is conditionally disabled so that bogus code
|
| - # in an #if 0 block can be handled. Since we will ignore
|
| - # it anyways, this is probably fine. So disable the
|
| - # exception and return the bogus char.
|
| - i += 1
|
| - else:
|
| - sys.stderr.write('Got invalid token in %s @ %d token:%s: %r\n' %
|
| - ('?', i, c, source[i-10:i+10]))
|
| - raise RuntimeError('unexpected token')
|
| -
|
| - if i <= 0:
|
| - print('Invalid index, exiting now.')
|
| - return
|
| - yield Token(token_type, source[start:i], start, i)
|
| -
|
| -
|
| -if __name__ == '__main__':
|
| - def main(argv):
|
| - """Driver mostly for testing purposes."""
|
| - for filename in argv[1:]:
|
| - source = utils.ReadFile(filename)
|
| - if source is None:
|
| - continue
|
| -
|
| - for token in GetTokens(source):
|
| - print('%-12s: %s' % (token.token_type, token.name))
|
| - # print('\r%6.2f%%' % (100.0 * index / token.end),)
|
| - sys.stdout.write('\n')
|
| -
|
| -
|
| - main(sys.argv)
|
|
|