Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(611)

Side by Side Diff: android_webview/tools/find_copyrights.pl

Issue 12087102: [Android WebView] Avoid detecting '(c)' as a copyright sign in lists. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Comments addressed Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | android_webview/tools/third_party_files_whitelist.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/perl -w 1 #!/usr/bin/perl -w
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 # Use: find_copyrights.pl <start-from> [exclude-dir ...] 6 # Use: find_copyrights.pl <start-from> [exclude-dir ...]
7 7
8 use strict; 8 use strict;
9 use warnings; 9 use warnings;
10 use File::Basename; 10 use File::Basename;
11 11
12 sub check_is_generated_file($); 12 sub check_is_generated_file($);
13 sub parse_copyright($); 13 sub start_copyright_parsing();
14 14
15 my $progname = basename($0); 15 my $progname = basename($0);
16 16
17 my $root_dir = shift @ARGV; 17 my $root_dir = shift @ARGV;
18 my @find_args = (); 18 my @find_args = ();
19 while (@ARGV) { 19 while (@ARGV) {
20 my $path = shift @ARGV; 20 my $path = shift @ARGV;
21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )' 21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )'
22 } 22 }
23 push @find_args, qw(-follow -type f -print); 23 push @find_args, qw(-follow -type f -print);
24 24
25 open FIND, '-|', 'find', $root_dir, @find_args 25 open FIND, '-|', 'find', $root_dir, @find_args
26 or die "$progname: Couldn't exec find: $!\n"; 26 or die "$progname: Couldn't exec find: $!\n";
27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' . 27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' .
28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' . 28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' .
29 '|tex|mli?)$'; 29 '|tex|mli?)$';
30 my @files = (); 30 my @files = ();
31 while (<FIND>) { 31 while (<FIND>) {
32 chomp; 32 chomp;
33 push @files, $_ unless (-z $_ || !m%$check_regex%); 33 push @files, $_ unless (-z $_ || !m%$check_regex%);
34 } 34 }
35 close FIND; 35 close FIND;
36 36
37 my $generated_file_scan_boundary = 25; 37 my $generated_file_scan_boundary = 25;
38 while (@files) { 38 while (@files) {
39 my $file = shift @files; 39 my $file = shift @files;
40 my $file_header = ''; 40 my $file_header = '';
41 my %copyrights; 41 my %copyrights;
42 open (F, "<$file") or die "$progname: Unable to access $file\n"; 42 open (F, "<$file") or die "$progname: Unable to access $file\n";
43 my $parse_copyright = start_copyright_parsing();
43 while (<F>) { 44 while (<F>) {
44 $file_header .= $_ unless $. > $generated_file_scan_boundary; 45 $file_header .= $_ unless $. > $generated_file_scan_boundary;
45 my $copyright_match = parse_copyright($_); 46 my $copyright_match = $parse_copyright->($_, $.);
46 if ($copyright_match) { 47 if ($copyright_match) {
47 $copyrights{lc("$copyright_match")} = "$copyright_match"; 48 $copyrights{lc("$copyright_match")} = "$copyright_match";
48 } 49 }
49 } 50 }
50 close(F); 51 close(F);
51 my $copyright = join(" / ", values %copyrights); 52 my $copyright = join(" / ", values %copyrights);
52 print "$file\t"; 53 print "$file\t";
53 if (check_is_generated_file($file_header)) { 54 if (check_is_generated_file($file_header)) {
54 print "GENERATED FILE"; 55 print "GENERATED FILE";
55 } else { 56 } else {
(...skipping 18 matching lines...) Expand all
74 if (index($license, 'DO NOT EDIT') != -1 || 75 if (index($license, 'DO NOT EDIT') != -1 ||
75 index($license, 'DO NOT DELETE') != -1 || 76 index($license, 'DO NOT DELETE') != -1 ||
76 index($license, 'GENERATED') != -1) { 77 index($license, 'GENERATED') != -1) {
77 return ($license =~ /(All changes made in this file will be lost' . 78 return ($license =~ /(All changes made in this file will be lost' .
78 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . 79 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' .
79 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); 80 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i);
80 } 81 }
81 return 0; 82 return 0;
82 } 83 }
83 84
84 sub parse_copyright($) { 85 sub are_within_increasing_progression($$$) {
85 my $line = $_[0]; 86 my $delta = $_[0] - $_[1];
86 # Remove C / C++ strings to avoid false positives. 87 return $delta >= 0 && $delta <= $_[2];
87 if (index($line, '"') != -1) { 88 }
88 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; 89
90 sub start_copyright_parsing() {
91 my $max_line_numbers_proximity = 3;
92 # Set up the defaults the way that proximity checks will not succeed.
93 my $last_a_item_line_number = -200;
94 my $last_b_item_line_number = -100;
95
96 return sub {
97 my $line = $_[0];
98 my $line_number = $_[1];
99
100 # Remove C / C++ strings to avoid false positives.
101 if (index($line, '"') != -1) {
102 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
103 }
104
105 my $uc_line = uc($line);
106
107 # Record '(a)' and '(b)' last occurences in C++ comments.
108 my $cpp_comment_idx = index($uc_line, '//');
109 if ($cpp_comment_idx != -1) {
110 if (index($uc_line, '(A)') > $cpp_comment_idx) {
111 $last_a_item_line_number = $line_number;
112 }
113 if (index($uc_line, '(B)') > $cpp_comment_idx) {
114 $last_b_item_line_number = $line_number;
115 }
116 }
117
118 # Fast bailout, uses the same patterns as the regexp.
119 if (index($uc_line, 'COPYRIGHT') == -1 &&
120 index($uc_line, 'COPR.') == -1 &&
121 index($uc_line, '\x{00a9}') == -1 &&
122 index($uc_line, '\xc2\xa9') == -1) {
123
124 my $c_item_index = index($uc_line, '(C)');
125 return '' if ($c_item_index == -1);
126 # Filter out 'c' used as a list item inside C++ comments.
127 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
128 if ($c_item_index > $cpp_comment_idx &&
129 are_within_increasing_progression(
130 $line_number,
131 $last_b_item_line_number,
132 $max_line_numbers_proximity) &&
133 are_within_increasing_progression(
134 $last_b_item_line_number,
135 $last_a_item_line_number,
136 $max_line_numbers_proximity)) {
137 return '';
138 }
139 }
140
141 my $copyright_indicator_regex =
142 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
143 my $copyright_disindicator_regex =
144 '\b(?:info(?:rmation)?|notice|and|or)\b';
145
146 my $copyright = '';
147 if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) {
148 my $match = $1;
149 if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
150 $match =~ s/([,.])?\s*$//;
151 $match =~ s/$copyright_indicator_regex//ig;
152 $match =~ s/^\s+//;
153 $match =~ s/\s{2,}/ /g;
154 $match =~ s/\\@/@/g;
155 $copyright = $match;
156 }
157 }
158
159 return $copyright;
89 } 160 }
90
91 my $uc_line = uc($line);
92 # Fast bailout, uses the same patterns as the regexp.
93 return '' if (index($uc_line, 'COPYRIGHT') == -1 &&
94 index($uc_line, 'COPR.') == -1 &&
95 index($uc_line, '\x{00a9}') == -1 &&
96 index($uc_line, '\xc2\xa9') == -1 &&
97 index($uc_line, '(C)') == -1);
98
99 my $copyright_indicator_regex =
100 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
101 my $copyright_disindicator_regex =
102 '\b(?:info(?:rmation)?|notice|and|or)\b';
103
104 my $copyright = '';
105 if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) {
106 my $match = $1;
107 if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
108 $match =~ s/([,.])?\s*$//;
109 $match =~ s/$copyright_indicator_regex//ig;
110 $match =~ s/^\s+//;
111 $match =~ s/\s{2,}/ /g;
112 $match =~ s/\\@/@/g;
113 $copyright = $match;
114 }
115 }
116
117 return $copyright;
118 } 161 }
OLDNEW
« no previous file with comments | « no previous file | android_webview/tools/third_party_files_whitelist.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698