| Index: android_webview/tools/copyright_scanner.py
|
| diff --git a/android_webview/tools/copyright_scanner.py b/android_webview/tools/copyright_scanner.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..90da30ded7303121fa678c01bd683061f75bc6fc
|
| --- /dev/null
|
| +++ b/android_webview/tools/copyright_scanner.py
|
| @@ -0,0 +1,196 @@
|
| +# Copyright 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Utilities for scanning source files to determine code authorship.
|
| +"""
|
| +
|
| +import itertools
|
| +import os
|
| +import re
|
| +
|
| +
|
| +def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
|
| + """Similar to UNIX utility find(1), searches for files in the directories.
|
| + Automatically leaves out only source code files.
|
| + Args:
|
| + root_dir: The root directory, to which all other paths are relative.
|
| + start_paths_list: The list of paths to start search from. Each path can
|
| + be a file or a directory.
|
| + excluded_dirs_list: The list of directories to skip.
|
| + Returns:
|
| + The list of source code files found, relative to |root_dir|.
|
| + """
|
| + dirs_blacklist = ['/' + d + '/' for d in excluded_dirs_list]
|
| + def IsBlacklistedDir(d):
|
| + for item in dirs_blacklist:
|
| + if item in d:
|
| + return True
|
| + return False
|
| +
|
| + files_whitelist_re = re.compile(
|
| + r'\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)'
|
| + '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?'
|
| + '|tex|mli?)$')
|
| + files = []
|
| +
|
| + base_path_len = len(root_dir)
|
| + for path in start_paths_list:
|
| + full_path = os.path.join(root_dir, path)
|
| + if os.path.isfile(full_path):
|
| + if files_whitelist_re.search(path):
|
| + files.append(path)
|
| + else:
|
| + for dirpath, dirnames, filenames in os.walk(full_path):
|
| + # Remove excluded subdirs for faster scanning.
|
| + for item in dirnames[:]:
|
| + if IsBlacklistedDir(os.path.join(dirpath, item)[base_path_len + 1:]):
|
| + dirnames.remove(item)
|
| + for filename in filenames:
|
| + filepath = os.path.join(dirpath, filename)[base_path_len + 1:]
|
| + if files_whitelist_re.search(filepath) and \
|
| + not IsBlacklistedDir(filepath):
|
| + files.append(filepath)
|
| + return files
|
| +
|
| +
|
| +python_multiline_string_double_re = re.compile(
|
| + r'"""[^"]*(?:"""|$)', flags=re.MULTILINE)
|
| +python_multiline_string_single_re = re.compile(
|
| + r"'''[^']*(?:'''|$)", flags=re.MULTILINE)
|
| +automatically_generated_re = re.compile(
|
| + r'(All changes made in this file will be lost'
|
| + '|DO NOT (EDIT|delete this file)'
|
| + '|Generated (at|automatically|data)'
|
| + '|Automatically generated'
|
| + '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=re.IGNORECASE)
|
| +
|
| +def _IsGeneratedFile(header):
|
| + header = header.upper()
|
| + if '"""' in header:
|
| + header = python_multiline_string_double_re.sub('', header)
|
| + if "'''" in header:
|
| + header = python_multiline_string_single_re.sub('', header)
|
| + # First do simple strings lookup to save time.
|
| + if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
|
| + return True
|
| + if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
|
| + 'GENERATED' in header:
|
| + return automatically_generated_re.search(header)
|
| + return False
|
| +
|
| +
|
| +GENERATED_FILE = 'GENERATED FILE'
|
| +NO_COPYRIGHT = '*No copyright*'
|
| +
|
| +class _CopyrightsScanner(object):
|
| + _c_comment_re = re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
|
| + _copyright_indicator = r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
|
| + _full_copyright_indicator_re = \
|
| + re.compile(r'(?:\W|^)' + _copyright_indicator + r'(?::\s*|\s+)(\w.*)$', \
|
| + re.IGNORECASE)
|
| + _copyright_disindicator_re = \
|
| + re.compile(r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', re.IGNORECASE)
|
| +
|
| + def __init__(self):
|
| + self.max_line_numbers_proximity = 3
|
| + self.last_a_item_line_number = -200
|
| + self.last_b_item_line_number = -100
|
| +
|
| + def _CloseLineNumbers(self, a, b):
|
| + return 0 <= a - b <= self.max_line_numbers_proximity
|
| +
|
| + def MatchLine(self, line_number, line):
|
| + if '"' in line:
|
| + line = _CopyrightsScanner._c_comment_re.sub('', line)
|
| + upcase_line = line.upper()
|
| + # Record '(a)' and '(b)' last occurences in C++ comments.
|
| + # This is to filter out '(c)' used as a list item inside C++ comments.
|
| + # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
|
| + cpp_comment_idx = upcase_line.find('//')
|
| + if cpp_comment_idx != -1:
|
| + if upcase_line.find('(A)') > cpp_comment_idx:
|
| + self.last_a_item_line_number = line_number
|
| + if upcase_line.find('(B)') > cpp_comment_idx:
|
| + self.last_b_item_line_number = line_number
|
| + # Fast bailout, uses the same patterns as _copyright_indicator regexp.
|
| + if not 'COPYRIGHT' in upcase_line and not 'COPR.' in upcase_line \
|
| + and not '\xc2\xa9' in upcase_line:
|
| + c_item_index = upcase_line.find('(C)')
|
| + if c_item_index == -1:
|
| + return None
|
| + if c_item_index > cpp_comment_idx and \
|
| + self._CloseLineNumbers(line_number,
|
| + self.last_b_item_line_number) and \
|
| + self._CloseLineNumbers(self.last_b_item_line_number,
|
| + self.last_a_item_line_number):
|
| + return None
|
| + copyr = None
|
| + m = _CopyrightsScanner._full_copyright_indicator_re.search(line)
|
| + if m and \
|
| + not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)):
|
| + copyr = m.group(0)
|
| + # Prettify the authorship string.
|
| + copyr = re.sub(r'([,.])?\s*$/', '', copyr)
|
| + copyr = re.sub(self._copyright_indicator, '', copyr, flags=re.IGNORECASE)
|
| + copyr = re.sub(r'^\s+', '', copyr)
|
| + copyr = re.sub(r'\s{2,}', ' ', copyr)
|
| + copyr = re.sub(r'\\@', '@', copyr)
|
| + return copyr
|
| +
|
| +
|
| +def FindCopyrights(root_dir, files_to_scan):
|
| + """Determines code autorship, and finds generated files.
|
| + Args:
|
| + root_dir: The root directory, to which all other paths are relative.
|
| + files_to_scan: The list of file names to scan.
|
| + Returns:
|
| + The list of copyrights associated with each of the files given.
|
| + If the certain file is generated, the corresponding list consists a single
|
| + entry -- 'GENERATED_FILE' string. If the file has no copyright info,
|
| + the corresponding list contains 'NO_COPYRIGHT' string.
|
| + """
|
| + copyrights = []
|
| + for file_name in files_to_scan:
|
| + linenum = 0
|
| + header = ''
|
| + file_copyrights = []
|
| + scanner = _CopyrightsScanner()
|
| + with open(os.path.join(root_dir, file_name), 'r') as f:
|
| + for l in f.readlines():
|
| + linenum += 1
|
| + if linenum <= 25:
|
| + header += l
|
| + c = scanner.MatchLine(linenum, l)
|
| + if c:
|
| + file_copyrights.append(c)
|
| + if _IsGeneratedFile(header):
|
| + copyrights.append([GENERATED_FILE])
|
| + elif file_copyrights:
|
| + copyrights.append(file_copyrights)
|
| + else:
|
| + copyrights.append([NO_COPYRIGHT])
|
| + return copyrights
|
| +
|
| +
|
| +def FindCopyrightViolations(root_dir, files_to_scan):
|
| + """Looks for files that are not belong exlusively to the Chromium Authors.
|
| + Args:
|
| + root_dir: The root directory, to which all other paths are relative.
|
| + files_to_scan: The list of file names to scan.
|
| + Returns:
|
| + The list of file names that contain non-Chromium copyrights.
|
| + """
|
| + copyrights = FindCopyrights(root_dir, files_to_scan)
|
| + offending_files = []
|
| + allowed_copyrights_re = re.compile(
|
| + r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. '
|
| + 'All rights reserved.*)$')
|
| + for f, cs in itertools.izip(files_to_scan, copyrights):
|
| + if cs[0] == GENERATED_FILE or cs[0] == NO_COPYRIGHT:
|
| + continue
|
| + for c in cs:
|
| + if not allowed_copyrights_re.match(c):
|
| + offending_files.append(os.path.normpath(f))
|
| + break
|
| + return offending_files
|
|
|