android_webview/tools/copyright_scanner.py - Issue 622493004: [Android WebView] Rewrite copyrights scanner in Python

Side by Side Diff: android_webview/tools/copyright_scanner.py

Issue 622493004: [Android WebView] Rewrite copyrights scanner in Python (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Add license header to the manual test script Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2014 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Utilities for scanning source files to determine code authorship.

	6 """

	7

	8 import itertools

	9 import os

	10 import re

	11

	12

	13 def FindFiles(root_dir, start_paths_list, excluded_dirs_list):

	14 """Similar to UNIX utility find(1), searches for files in the directories.

	15 Automatically leaves out only source code files.

	16 Args:

	17 root_dir: The root directory, to which all other paths are relative.

	18 start_paths_list: The list of paths to start search from. Each path can

	19 be a file or a directory.

	20 excluded_dirs_list: The list of directories to skip.

	21 Returns:

	22 The list of source code files found, relative to \|root_dir\|.

	23 """

	24 dirs_blacklist = ['/' + d + '/' for d in excluded_dirs_list]

	25 def IsBlacklistedDir(d):

	26 for item in dirs_blacklist:

	27 if item in d:

	28 return True

	29 return False

	30

	31 files_whitelist_re = re.compile(

	32 r'\.(asm\|c(c\|pp\|xx)?\|h(h\|pp\|xx)?\|p(l\|m)\|xs\|sh\|php\|py(\|x)'

	33 '\|rb\|idl\|java\|el\|sc(i\|e)\|cs\|pas\|inc\|js\|pac\|html\|dtd\|xsl\|mod\|mm?'

	34 '\|tex\|mli?)$')

	35 files = []

	36

	37 base_path_len = len(root_dir)

	38 for path in start_paths_list:

	39 full_path = os.path.join(root_dir, path)

	40 if os.path.isfile(full_path):

	41 if files_whitelist_re.search(path):

	42 files.append(path)

	43 else:

	44 for dirpath, dirnames, filenames in os.walk(full_path):

	45 # Remove excluded subdirs for faster scanning.

	46 for item in dirnames[:]:

	47 if IsBlacklistedDir(os.path.join(dirpath, item)[base_path_len + 1:]):

	48 dirnames.remove(item)

	49 for filename in filenames:

	50 filepath = os.path.join(dirpath, filename)[base_path_len + 1:]

	51 if files_whitelist_re.search(filepath) and \

	52 not IsBlacklistedDir(filepath):

	53 files.append(filepath)

	54 return files

	55

	56

	57 python_multiline_string_double_re = re.compile(

	58 r'"""[^"]*(?:"""\|$)', flags=re.MULTILINE)

	59 python_multiline_string_single_re = re.compile(

	60 r"'''[^']*(?:'''\|$)", flags=re.MULTILINE)

	61 automatically_generated_re = re.compile(

	62 r'(All changes made in this file will be lost'

	63 '\|DO NOT (EDIT\|delete this file)'

	64 '\|Generated (at\|automatically\|data)'

	65 '\|Automatically generated'

	66 '\|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=re.IGNORECASE)

	67

	68 def _IsGeneratedFile(header):

	69 header = header.upper()

	70 if '"""' in header:

	71 header = python_multiline_string_double_re.sub('', header)

	72 if "'''" in header:

	73 header = python_multiline_string_single_re.sub('', header)

	74 # First do simple strings lookup to save time.

	75 if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:

	76 return True

	77 if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \

	78 'GENERATED' in header:

	79 return automatically_generated_re.search(header)

	80 return False

	81

	82

	83 GENERATED_FILE = 'GENERATED FILE'

	84 NO_COPYRIGHT = 'No copyright'

	85

	86 class _CopyrightsScanner(object):

	87 _c_comment_re = re.compile(r'''"[^"\\](?:\\.[^"\\])*"''')

	88 _copyright_indicator = r'(?:copyright\|copr\.\|\xc2\xa9\|$c$)'

	89 _full_copyright_indicator_re = \

	90 re.compile(r'(?:\W\|^)' + _copyright_indicator + r'(?::\s\|\s+)(\w.)$', \

	91 re.IGNORECASE)

	92 _copyright_disindicator_re = \

	93 re.compile(r'\s*\b(?:info(?:rmation)?\|notice\|and\|or)\b', re.IGNORECASE)

	94

	95 def __init__(self):

	96 self.max_line_numbers_proximity = 3

	97 self.last_a_item_line_number = -200

	98 self.last_b_item_line_number = -100

	99

	100 def _CloseLineNumbers(self, a, b):

	101 return 0 <= a - b <= self.max_line_numbers_proximity

	102

	103 def MatchLine(self, line_number, line):

	104 if '"' in line:

	105 line = _CopyrightsScanner._c_comment_re.sub('', line)

	106 upcase_line = line.upper()

	107 # Record '(a)' and '(b)' last occurences in C++ comments.

	108 # This is to filter out '(c)' used as a list item inside C++ comments.

	109 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"

	110 cpp_comment_idx = upcase_line.find('//')

	111 if cpp_comment_idx != -1:

	112 if upcase_line.find('(A)') > cpp_comment_idx:

	113 self.last_a_item_line_number = line_number

	114 if upcase_line.find('(B)') > cpp_comment_idx:

	115 self.last_b_item_line_number = line_number

	116 # Fast bailout, uses the same patterns as _copyright_indicator regexp.

	117 if not 'COPYRIGHT' in upcase_line and not 'COPR.' in upcase_line \

	118 and not '\xc2\xa9' in upcase_line:

	119 c_item_index = upcase_line.find('(C)')

	120 if c_item_index == -1:

	121 return None

	122 if c_item_index > cpp_comment_idx and \

	123 self._CloseLineNumbers(line_number,

	124 self.last_b_item_line_number) and \

	125 self._CloseLineNumbers(self.last_b_item_line_number,

	126 self.last_a_item_line_number):

	127 return None

	128 copyr = None

	129 m = _CopyrightsScanner._full_copyright_indicator_re.search(line)

	130 if m and \

	131 not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)):

	132 copyr = m.group(0)

	133 # Prettify the authorship string.

	134 copyr = re.sub(r'([,.])?\s*$/', '', copyr)

	135 copyr = re.sub(self._copyright_indicator, '', copyr, flags=re.IGNORECASE)

	136 copyr = re.sub(r'^\s+', '', copyr)

	137 copyr = re.sub(r'\s{2,}', ' ', copyr)

	138 copyr = re.sub(r'\\@', '@', copyr)

	139 return copyr

	140

	141

	142 def FindCopyrights(root_dir, files_to_scan):

	143 """Determines code autorship, and finds generated files.

	144 Args:

	145 root_dir: The root directory, to which all other paths are relative.

	146 files_to_scan: The list of file names to scan.

	147 Returns:

	148 The list of copyrights associated with each of the files given.

	149 If the certain file is generated, the corresponding list consists a single

	150 entry -- 'GENERATED_FILE' string. If the file has no copyright info,

	151 the corresponding list contains 'NO_COPYRIGHT' string.

	152 """

	153 copyrights = []

	154 for file_name in files_to_scan:

	155 linenum = 0

	156 header = ''

	157 file_copyrights = []

	158 scanner = _CopyrightsScanner()

	159 with open(os.path.join(root_dir, file_name), 'r') as f:

	160 for l in f.readlines():

	161 linenum += 1

	162 if linenum <= 25:

	163 header += l

	164 c = scanner.MatchLine(linenum, l)

	165 if c:

	166 file_copyrights.append(c)

	167 if _IsGeneratedFile(header):

	168 copyrights.append([GENERATED_FILE])

	169 elif file_copyrights:

	170 copyrights.append(file_copyrights)

	171 else:

	172 copyrights.append([NO_COPYRIGHT])

	173 return copyrights

	174

	175

	176 def FindCopyrightViolations(root_dir, files_to_scan):

	177 """Looks for files that are not belong exlusively to the Chromium Authors.

	178 Args:

	179 root_dir: The root directory, to which all other paths are relative.

	180 files_to_scan: The list of file names to scan.

	181 Returns:

	182 The list of file names that contain non-Chromium copyrights.

	183 """

	184 copyrights = FindCopyrights(root_dir, files_to_scan)

	185 offending_files = []

	186 allowed_copyrights_re = re.compile(

	187 r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. '

	188 'All rights reserved.*)$')

	189 for f, cs in itertools.izip(files_to_scan, copyrights):

	190 if cs[0] == GENERATED_FILE or cs[0] == NO_COPYRIGHT:

	191 continue

	192 for c in cs:

	193 if not allowed_copyrights_re.match(c):

	194 offending_files.append(os.path.normpath(f))

	195 break

	196 return offending_files

OLD	NEW

« no previous file with comments | « no previous file | android_webview/tools/find_copyrights.pl » ('j') | no next file with comments »