Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: android_webview/tools/find_copyrights.pl

Issue 622493004: [Android WebView] Rewrite copyrights scanner in Python (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/perl -w
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 # Use: echo filename1.cc ... | find_copyrights.pl
7 # or: find_copyrights.pl list_file
8 # or: find_files.pl ... | find_copyrights.pl
9
10 use strict;
11 use warnings;
12 use File::Basename;
13
14 sub check_is_generated_file($);
15 sub start_copyright_parsing();
16
17 my $progname = basename($0);
18
19 my $generated_file_scan_boundary = 25;
20 while (<>) {
21 chomp;
22 my $file = $_;
23 my $file_header = '';
24 my %copyrights;
25 open (F, "<$file") or die "$progname: Unable to access $file\n";
26 my $parse_copyright = start_copyright_parsing();
27 while (<F>) {
28 $file_header .= $_ unless $. > $generated_file_scan_boundary;
29 my $copyright_match = $parse_copyright->($_, $.);
30 if ($copyright_match) {
31 $copyrights{lc("$copyright_match")} = "$copyright_match";
32 }
33 }
34 close(F);
35 my $copyright = join(" / ", sort values %copyrights);
36 print "$file\t";
37 if (check_is_generated_file($file_header)) {
38 print "GENERATED FILE";
39 } else {
40 print ($copyright or "*No copyright*");
41 }
42 print "\n";
43 }
44
45 sub check_is_generated_file($) {
46 my $license = uc($_[0]);
47 # Remove Python multiline comments to avoid false positives
48 if (index($license, '"""') != -1) {
49 $license =~ s/"""[^"]*(?:"""|$)//mg;
50 }
51 if (index($license, "'''") != -1) {
52 $license =~ s/'''[^']*(?:'''|$)//mg;
53 }
54 # Quick checks using index.
55 if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) {
56 return 1;
57 }
58 if (index($license, 'DO NOT EDIT') != -1 ||
59 index($license, 'DO NOT DELETE') != -1 ||
60 index($license, 'GENERATED') != -1) {
61 return ($license =~ /(All changes made in this file will be lost' .
62 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' .
63 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i);
64 }
65 return 0;
66 }
67
68 sub are_within_increasing_progression($$$) {
69 my $delta = $_[0] - $_[1];
70 return $delta >= 0 && $delta <= $_[2];
71 }
72
73 sub start_copyright_parsing() {
74 my $max_line_numbers_proximity = 3;
75 # Set up the defaults the way that proximity checks will not succeed.
76 my $last_a_item_line_number = -200;
77 my $last_b_item_line_number = -100;
78
79 return sub {
80 my $line = $_[0];
81 my $line_number = $_[1];
82
83 # Remove C / C++ strings to avoid false positives.
84 if (index($line, '"') != -1) {
85 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
86 }
87
88 my $uc_line = uc($line);
89
90 # Record '(a)' and '(b)' last occurences in C++ comments.
91 my $cpp_comment_idx = index($uc_line, '//');
92 if ($cpp_comment_idx != -1) {
93 if (index($uc_line, '(A)') > $cpp_comment_idx) {
94 $last_a_item_line_number = $line_number;
95 }
96 if (index($uc_line, '(B)') > $cpp_comment_idx) {
97 $last_b_item_line_number = $line_number;
98 }
99 }
100
101 # Fast bailout, uses the same patterns as the regexp.
102 if (index($uc_line, 'COPYRIGHT') == -1 &&
103 index($uc_line, 'COPR.') == -1 &&
104 index($uc_line, '\x{00a9}') == -1 &&
105 index($uc_line, '\xc2\xa9') == -1) {
106
107 my $c_item_index = index($uc_line, '(C)');
108 return '' if ($c_item_index == -1);
109 # Filter out 'c' used as a list item inside C++ comments.
110 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
111 if ($c_item_index > $cpp_comment_idx &&
112 are_within_increasing_progression(
113 $line_number,
114 $last_b_item_line_number,
115 $max_line_numbers_proximity) &&
116 are_within_increasing_progression(
117 $last_b_item_line_number,
118 $last_a_item_line_number,
119 $max_line_numbers_proximity)) {
120 return '';
121 }
122 }
123
124 my $copyright_indicator_regex =
125 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
126 my $full_copyright_indicator_regex =
127 sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex;
128 my $copyright_disindicator_regex =
129 '\b(?:info(?:rmation)?|notice|and|or)\b';
130
131 my $copyright = '';
132 if ($line =~ m%$full_copyright_indicator_regex%i) {
133 my $match = $1;
134 if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
135 $match =~ s/([,.])?\s*$//;
136 $match =~ s/$copyright_indicator_regex//ig;
137 $match =~ s/^\s+//;
138 $match =~ s/\s{2,}/ /g;
139 $match =~ s/\\@/@/g;
140 $copyright = $match;
141 }
142 }
143
144 return $copyright;
145 }
146 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698