| Index: android_webview/tools/copyright_scanner.py
|
| diff --git a/android_webview/tools/copyright_scanner.py b/android_webview/tools/copyright_scanner.py
|
| index 90da30ded7303121fa678c01bd683061f75bc6fc..7e4ef0c31db52fc68692c06c09727ec9f1b29c54 100644
|
| --- a/android_webview/tools/copyright_scanner.py
|
| +++ b/android_webview/tools/copyright_scanner.py
|
| @@ -6,14 +6,13 @@
|
| """
|
|
|
| import itertools
|
| -import os
|
| -import re
|
|
|
|
|
| -def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
|
| +def FindFiles(input_api, root_dir, start_paths_list, excluded_dirs_list):
|
| """Similar to UNIX utility find(1), searches for files in the directories.
|
| Automatically leaves out only source code files.
|
| Args:
|
| + input_api: InputAPI, as in presubmit scripts.
|
| root_dir: The root directory, to which all other paths are relative.
|
| start_paths_list: The list of paths to start search from. Each path can
|
| be a file or a directory.
|
| @@ -28,7 +27,7 @@ def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
|
| return True
|
| return False
|
|
|
| - files_whitelist_re = re.compile(
|
| + files_whitelist_re = input_api.re.compile(
|
| r'\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)'
|
| '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?'
|
| '|tex|mli?)$')
|
| @@ -36,66 +35,75 @@ def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
|
|
|
| base_path_len = len(root_dir)
|
| for path in start_paths_list:
|
| - full_path = os.path.join(root_dir, path)
|
| - if os.path.isfile(full_path):
|
| + full_path = input_api.os_path.join(root_dir, path)
|
| + if input_api.os_path.isfile(full_path):
|
| if files_whitelist_re.search(path):
|
| files.append(path)
|
| else:
|
| - for dirpath, dirnames, filenames in os.walk(full_path):
|
| + for dirpath, dirnames, filenames in input_api.os_walk(full_path):
|
| # Remove excluded subdirs for faster scanning.
|
| for item in dirnames[:]:
|
| - if IsBlacklistedDir(os.path.join(dirpath, item)[base_path_len + 1:]):
|
| + if IsBlacklistedDir(
|
| + input_api.os_path.join(dirpath, item)[base_path_len + 1:]):
|
| dirnames.remove(item)
|
| for filename in filenames:
|
| - filepath = os.path.join(dirpath, filename)[base_path_len + 1:]
|
| + filepath = \
|
| + input_api.os_path.join(dirpath, filename)[base_path_len + 1:]
|
| if files_whitelist_re.search(filepath) and \
|
| not IsBlacklistedDir(filepath):
|
| files.append(filepath)
|
| return files
|
|
|
|
|
| -python_multiline_string_double_re = re.compile(
|
| - r'"""[^"]*(?:"""|$)', flags=re.MULTILINE)
|
| -python_multiline_string_single_re = re.compile(
|
| - r"'''[^']*(?:'''|$)", flags=re.MULTILINE)
|
| -automatically_generated_re = re.compile(
|
| - r'(All changes made in this file will be lost'
|
| - '|DO NOT (EDIT|delete this file)'
|
| - '|Generated (at|automatically|data)'
|
| - '|Automatically generated'
|
| - '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=re.IGNORECASE)
|
| -
|
| -def _IsGeneratedFile(header):
|
| - header = header.upper()
|
| - if '"""' in header:
|
| - header = python_multiline_string_double_re.sub('', header)
|
| - if "'''" in header:
|
| - header = python_multiline_string_single_re.sub('', header)
|
| - # First do simple strings lookup to save time.
|
| - if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
|
| - return True
|
| - if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
|
| - 'GENERATED' in header:
|
| - return automatically_generated_re.search(header)
|
| - return False
|
| -
|
| -
|
| -GENERATED_FILE = 'GENERATED FILE'
|
| -NO_COPYRIGHT = '*No copyright*'
|
| +class _GeneratedFilesDetector(object):
|
| + GENERATED_FILE = 'GENERATED FILE'
|
| + NO_COPYRIGHT = '*No copyright*'
|
| +
|
| + def __init__(self, input_api):
|
| + self.python_multiline_string_double_re = \
|
| + input_api.re.compile(r'"""[^"]*(?:"""|$)', flags=input_api.re.MULTILINE)
|
| + self.python_multiline_string_single_re = \
|
| + input_api.re.compile(r"'''[^']*(?:'''|$)", flags=input_api.re.MULTILINE)
|
| + self.automatically_generated_re = input_api.re.compile(
|
| + r'(All changes made in this file will be lost'
|
| + '|DO NOT (EDIT|delete this file)'
|
| + '|Generated (at|automatically|data)'
|
| + '|Automatically generated'
|
| + '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=input_api.re.IGNORECASE)
|
| +
|
| + def IsGeneratedFile(self, header):
|
| + header = header.upper()
|
| + if '"""' in header:
|
| + header = self.python_multiline_string_double_re.sub('', header)
|
| + if "'''" in header:
|
| + header = self.python_multiline_string_single_re.sub('', header)
|
| + # First do simple strings lookup to save time.
|
| + if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
|
| + return True
|
| + if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
|
| + 'GENERATED' in header:
|
| + return self.automatically_generated_re.search(header)
|
| + return False
|
| +
|
|
|
| class _CopyrightsScanner(object):
|
| - _c_comment_re = re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
|
| - _copyright_indicator = r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
|
| - _full_copyright_indicator_re = \
|
| - re.compile(r'(?:\W|^)' + _copyright_indicator + r'(?::\s*|\s+)(\w.*)$', \
|
| - re.IGNORECASE)
|
| - _copyright_disindicator_re = \
|
| - re.compile(r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', re.IGNORECASE)
|
| -
|
| - def __init__(self):
|
| + @staticmethod
|
| + def StaticInit(input_api):
|
| + _CopyrightsScanner._c_comment_re = \
|
| + input_api.re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
|
| + _CopyrightsScanner._copyright_indicator = \
|
| + r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
|
| + _CopyrightsScanner._full_copyright_indicator_re = input_api.re.compile(
|
| + r'(?:\W|^)' + _CopyrightsScanner._copyright_indicator + \
|
| + r'(?::\s*|\s+)(\w.*)$', input_api.re.IGNORECASE)
|
| + _CopyrightsScanner._copyright_disindicator_re = input_api.re.compile(
|
| + r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', input_api.re.IGNORECASE)
|
| +
|
| + def __init__(self, input_api):
|
| self.max_line_numbers_proximity = 3
|
| self.last_a_item_line_number = -200
|
| self.last_b_item_line_number = -100
|
| + self.re = input_api.re
|
|
|
| def _CloseLineNumbers(self, a, b):
|
| return 0 <= a - b <= self.max_line_numbers_proximity
|
| @@ -131,17 +139,20 @@ class _CopyrightsScanner(object):
|
| not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)):
|
| copyr = m.group(0)
|
| # Prettify the authorship string.
|
| - copyr = re.sub(r'([,.])?\s*$/', '', copyr)
|
| - copyr = re.sub(self._copyright_indicator, '', copyr, flags=re.IGNORECASE)
|
| - copyr = re.sub(r'^\s+', '', copyr)
|
| - copyr = re.sub(r'\s{2,}', ' ', copyr)
|
| - copyr = re.sub(r'\\@', '@', copyr)
|
| + copyr = self.re.sub(r'([,.])?\s*$/', '', copyr)
|
| + copyr = self.re.sub(
|
| + _CopyrightsScanner._copyright_indicator, '', copyr, \
|
| + flags=self.re.IGNORECASE)
|
| + copyr = self.re.sub(r'^\s+', '', copyr)
|
| + copyr = self.re.sub(r'\s{2,}', ' ', copyr)
|
| + copyr = self.re.sub(r'\\@', '@', copyr)
|
| return copyr
|
|
|
|
|
| -def FindCopyrights(root_dir, files_to_scan):
|
| +def FindCopyrights(input_api, root_dir, files_to_scan):
|
| """Determines code autorship, and finds generated files.
|
| Args:
|
| + input_api: InputAPI, as in presubmit scripts.
|
| root_dir: The root directory, to which all other paths are relative.
|
| files_to_scan: The list of file names to scan.
|
| Returns:
|
| @@ -150,47 +161,52 @@ def FindCopyrights(root_dir, files_to_scan):
|
| entry -- 'GENERATED_FILE' string. If the file has no copyright info,
|
| the corresponding list contains 'NO_COPYRIGHT' string.
|
| """
|
| + generated_files_detector = _GeneratedFilesDetector(input_api)
|
| + _CopyrightsScanner.StaticInit(input_api)
|
| copyrights = []
|
| for file_name in files_to_scan:
|
| linenum = 0
|
| - header = ''
|
| + header = []
|
| file_copyrights = []
|
| - scanner = _CopyrightsScanner()
|
| - with open(os.path.join(root_dir, file_name), 'r') as f:
|
| - for l in f.readlines():
|
| - linenum += 1
|
| - if linenum <= 25:
|
| - header += l
|
| - c = scanner.MatchLine(linenum, l)
|
| - if c:
|
| - file_copyrights.append(c)
|
| - if _IsGeneratedFile(header):
|
| - copyrights.append([GENERATED_FILE])
|
| - elif file_copyrights:
|
| - copyrights.append(file_copyrights)
|
| - else:
|
| - copyrights.append([NO_COPYRIGHT])
|
| + scanner = _CopyrightsScanner(input_api)
|
| + contents = input_api.ReadFile(
|
| + input_api.os_path.join(root_dir, file_name), 'r')
|
| + for l in contents.split('\n'):
|
| + linenum += 1
|
| + if linenum <= 25:
|
| + header.append(l)
|
| + c = scanner.MatchLine(linenum, l)
|
| + if c:
|
| + file_copyrights.append(c)
|
| + if generated_files_detector.IsGeneratedFile('\n'.join(header)):
|
| + copyrights.append([_GeneratedFilesDetector.GENERATED_FILE])
|
| + elif file_copyrights:
|
| + copyrights.append(file_copyrights)
|
| + else:
|
| + copyrights.append([_GeneratedFilesDetector.NO_COPYRIGHT])
|
| return copyrights
|
|
|
|
|
| -def FindCopyrightViolations(root_dir, files_to_scan):
|
| +def FindCopyrightViolations(input_api, root_dir, files_to_scan):
|
| """Looks for files that are not belong exlusively to the Chromium Authors.
|
| Args:
|
| + input_api: InputAPI, as in presubmit scripts.
|
| root_dir: The root directory, to which all other paths are relative.
|
| files_to_scan: The list of file names to scan.
|
| Returns:
|
| The list of file names that contain non-Chromium copyrights.
|
| """
|
| - copyrights = FindCopyrights(root_dir, files_to_scan)
|
| + copyrights = FindCopyrights(input_api, root_dir, files_to_scan)
|
| offending_files = []
|
| - allowed_copyrights_re = re.compile(
|
| + allowed_copyrights_re = input_api.re.compile(
|
| r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. '
|
| 'All rights reserved.*)$')
|
| for f, cs in itertools.izip(files_to_scan, copyrights):
|
| - if cs[0] == GENERATED_FILE or cs[0] == NO_COPYRIGHT:
|
| + if cs[0] == _GeneratedFilesDetector.GENERATED_FILE or \
|
| + cs[0] == _GeneratedFilesDetector.NO_COPYRIGHT:
|
| continue
|
| for c in cs:
|
| if not allowed_copyrights_re.match(c):
|
| - offending_files.append(os.path.normpath(f))
|
| + offending_files.append(input_api.os_path.normpath(f))
|
| break
|
| return offending_files
|
|
|