third_party/gmock/scripts/generator/cpp/tokenize.py - Issue 624713003: Keep only base/extractor.[cc|h].

Unified Diff: third_party/gmock/scripts/generator/cpp/tokenize.py

Issue 624713003: Keep only base/extractor.[cc|h]. (Closed) Base URL: https://chromium.googlesource.com/external/omaha.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/gmock/scripts/generator/cpp/tokenize.py

diff --git a/third_party/gmock/scripts/generator/cpp/tokenize.py b/third_party/gmock/scripts/generator/cpp/tokenize.py

deleted file mode 100644

index 28c334529980b540797034f90a1031001a6c0bcd..0000000000000000000000000000000000000000

--- a/third_party/gmock/scripts/generator/cpp/tokenize.py

+++ /dev/null

@@ -1,287 +0,0 @@

-#!/usr/bin/env python

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""Tokenize C++ source code."""

-__author__ = 'nnorwitz@google.com (Neal Norwitz)'

-try:

- # Python 3.x

- import builtins

-except ImportError:

- # Python 2.x

- import __builtin__ as builtins

-import sys

-from cpp import utils

-if not hasattr(builtins, 'set'):

- # Nominal support for Python 2.3.

- from sets import Set as set

-# Add $ as a valid identifier char since so much code uses it.

-_letters = 'abcdefghijklmnopqrstuvwxyz'

-VALID_IDENTIFIER_CHARS = set(_letters + _letters.upper() + '_0123456789$')

-HEX_DIGITS = set('0123456789abcdefABCDEF')

-INT_OR_FLOAT_DIGITS = set('01234567890eE-+')

-# C++0x string preffixes.

-_STR_PREFIXES = set(('R', 'u8', 'u8R', 'u', 'uR', 'U', 'UR', 'L', 'LR'))

-# Token types.

-UNKNOWN = 'UNKNOWN'

-SYNTAX = 'SYNTAX'

-CONSTANT = 'CONSTANT'

-NAME = 'NAME'

-PREPROCESSOR = 'PREPROCESSOR'

-# Where the token originated from. This can be used for backtracking.

-# It is always set to WHENCE_STREAM in this code.

-WHENCE_STREAM, WHENCE_QUEUE = range(2)

-class Token(object):

- """Data container to represent a C++ token.

- Tokens can be identifiers, syntax char(s), constants, or

- pre-processor directives.

- start contains the index of the first char of the token in the source

- end contains the index of the last char of the token in the source

- """

- def __init__(self, token_type, name, start, end):

- self.token_type = token_type

- self.name = name

- self.start = start

- self.end = end

- self.whence = WHENCE_STREAM

- def __str__(self):

- if not utils.DEBUG:

- return 'Token(%r)' % self.name

- return 'Token(%r, %s, %s)' % (self.name, self.start, self.end)

- __repr__ = __str__

-def _GetString(source, start, i):

- i = source.find('"', i+1)

- while source[i-1] == '\\':

- # Count the trailing backslashes.

- backslash_count = 1

- j = i - 2

- while source[j] == '\\':

- backslash_count += 1

- j -= 1

- # When trailing backslashes are even, they escape each other.

- if (backslash_count % 2) == 0:

- break

- i = source.find('"', i+1)

- return i + 1

-def _GetChar(source, start, i):

- # NOTE(nnorwitz): may not be quite correct, should be good enough.

- i = source.find("'", i+1)

- while source[i-1] == '\\':

- # Need to special case '\\'.

- if (i - 2) > start and source[i-2] == '\\':

- break

- i = source.find("'", i+1)

- # Try to handle unterminated single quotes (in a #if 0 block).

- if i < 0:

- i = start

- return i + 1

-def GetTokens(source):

- """Returns a sequence of Tokens.

- Args:

- source: string of C++ source code.

- Yields:

- Token that represents the next token in the source.

- """

- # Cache various valid character sets for speed.

- valid_identifier_chars = VALID_IDENTIFIER_CHARS

- hex_digits = HEX_DIGITS

- int_or_float_digits = INT_OR_FLOAT_DIGITS

- int_or_float_digits2 = int_or_float_digits | set('.')

- # Only ignore errors while in a #if 0 block.

- ignore_errors = False

- count_ifs = 0

- i = 0

- end = len(source)

- while i < end:

- # Skip whitespace.

- while i < end and source[i].isspace():

- i += 1

- if i >= end:

- return

- token_type = UNKNOWN

- start = i

- c = source[i]

- if c.isalpha() or c == '_': # Find a string token.

- token_type = NAME

- while source[i] in valid_identifier_chars:

- i += 1

- # String and character constants can look like a name if

- # they are something like L"".

- if (source[i] == "'" and (i - start) == 1 and

- source[start:i] in 'uUL'):

- # u, U, and L are valid C++0x character preffixes.

- token_type = CONSTANT

- i = _GetChar(source, start, i)

- elif source[i] == "'" and source[start:i] in _STR_PREFIXES:

- token_type = CONSTANT

- i = _GetString(source, start, i)

- elif c == '/' and source[i+1] == '/': # Find // comments.

- i = source.find('\n', i)

- if i == -1: # Handle EOF.

- i = end

- continue

- elif c == '/' and source[i+1] == '*': # Find /* comments. */

- i = source.find('*/', i) + 2

- continue

- elif c in ':+-<>&|*=': # : or :: (plus other chars).

- token_type = SYNTAX

- i += 1

- new_ch = source[i]

- if new_ch == c:

- i += 1

- elif c == '-' and new_ch == '>':

- i += 1

- elif new_ch == '=':

- i += 1

- elif c in '()[]{}~!?^%;/.,': # Handle single char tokens.

- token_type = SYNTAX

- i += 1

- if c == '.' and source[i].isdigit():

- token_type = CONSTANT

- i += 1

- while source[i] in int_or_float_digits:

- i += 1

- # Handle float suffixes.

- for suffix in ('l', 'f'):

- if suffix == source[i:i+1].lower():

- i += 1

- break

- elif c.isdigit(): # Find integer.

- token_type = CONSTANT

- if c == '0' and source[i+1] in 'xX':

- # Handle hex digits.

- i += 2

- while source[i] in hex_digits:

- i += 1

- else:

- while source[i] in int_or_float_digits2:

- i += 1

- # Handle integer (and float) suffixes.

- for suffix in ('ull', 'll', 'ul', 'l', 'f', 'u'):

- size = len(suffix)

- if suffix == source[i:i+size].lower():

- i += size

- break

- elif c == '"': # Find string.

- token_type = CONSTANT

- i = _GetString(source, start, i)

- elif c == "'": # Find char.

- token_type = CONSTANT

- i = _GetChar(source, start, i)

- elif c == '#': # Find pre-processor command.

- token_type = PREPROCESSOR

- got_if = source[i:i+3] == '#if' and source[i+3:i+4].isspace()

- if got_if:

- count_ifs += 1

- elif source[i:i+6] == '#endif':

- count_ifs -= 1

- if count_ifs == 0:

- ignore_errors = False

- # TODO(nnorwitz): handle preprocessor statements (\ continuations).

- while 1:

- i1 = source.find('\n', i)

- i2 = source.find('//', i)

- i3 = source.find('/*', i)

- i4 = source.find('"', i)

- # NOTE(nnorwitz): doesn't handle comments in #define macros.

- # Get the first important symbol (newline, comment, EOF/end).

- i = min([x for x in (i1, i2, i3, i4, end) if x != -1])

- # Handle #include "dir//foo.h" properly.

- if source[i] == '"':

- i = source.find('"', i+1) + 1

- assert i > 0

- continue

- # Keep going if end of the line and the line ends with \.

- if not (i == i1 and source[i-1] == '\\'):

- if got_if:

- condition = source[start+4:i].lstrip()

- if (condition.startswith('0') or

- condition.startswith('(0)')):

- ignore_errors = True

- break

- i += 1

- elif c == '\\': # Handle \ in code.

- # This is different from the pre-processor \ handling.

- i += 1

- continue

- elif ignore_errors:

- # The tokenizer seems to be in pretty good shape. This

- # raise is conditionally disabled so that bogus code

- # in an #if 0 block can be handled. Since we will ignore

- # it anyways, this is probably fine. So disable the

- # exception and return the bogus char.

- i += 1

- else:

- sys.stderr.write('Got invalid token in %s @ %d token:%s: %r\n' %

- ('?', i, c, source[i-10:i+10]))

- raise RuntimeError('unexpected token')

- if i <= 0:

- print('Invalid index, exiting now.')

- return

- yield Token(token_type, source[start:i], start, i)

-if __name__ == '__main__':

- def main(argv):

- """Driver mostly for testing purposes."""

- for filename in argv[1:]:

- source = utils.ReadFile(filename)

- if source is None:

- continue

- for token in GetTokens(source):

- print('%-12s: %s' % (token.token_type, token.name))

- # print('\r%6.2f%%' % (100.0 * index / token.end),)

- sys.stdout.write('\n')

- main(sys.argv)

« no previous file with comments | « third_party/gmock/scripts/generator/cpp/keywords.py ('k') | third_party/gmock/scripts/generator/cpp/utils.py » ('j') | no next file with comments »