OLD | NEW |
1 #!/usr/bin/perl -w | 1 #!/usr/bin/perl -w |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 # Use: find_copyrights.pl <start-from> [exclude-dir ...] | 6 # Use: find_copyrights.pl <start-from> [exclude-dir ...] |
7 | 7 |
8 use strict; | 8 use strict; |
9 use warnings; | 9 use warnings; |
10 use File::Basename; | 10 use File::Basename; |
11 | 11 |
12 sub check_is_generated_file($); | 12 sub check_is_generated_file($); |
13 sub parse_copyright($); | 13 sub start_copyright_parsing(); |
14 | 14 |
15 my $progname = basename($0); | 15 my $progname = basename($0); |
16 | 16 |
17 my $root_dir = shift @ARGV; | 17 my $root_dir = shift @ARGV; |
18 my @find_args = (); | 18 my @find_args = (); |
19 while (@ARGV) { | 19 while (@ARGV) { |
20 my $path = shift @ARGV; | 20 my $path = shift @ARGV; |
21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )' | 21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )' |
22 } | 22 } |
23 push @find_args, qw(-follow -type f -print); | 23 push @find_args, qw(-follow -type f -print); |
24 | 24 |
25 open FIND, '-|', 'find', $root_dir, @find_args | 25 open FIND, '-|', 'find', $root_dir, @find_args |
26 or die "$progname: Couldn't exec find: $!\n"; | 26 or die "$progname: Couldn't exec find: $!\n"; |
27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' . | 27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' . |
28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' . | 28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' . |
29 '|tex|mli?)$'; | 29 '|tex|mli?)$'; |
30 my @files = (); | 30 my @files = (); |
31 while (<FIND>) { | 31 while (<FIND>) { |
32 chomp; | 32 chomp; |
33 push @files, $_ unless (-z $_ || !m%$check_regex%); | 33 push @files, $_ unless (-z $_ || !m%$check_regex%); |
34 } | 34 } |
35 close FIND; | 35 close FIND; |
36 | 36 |
37 my $generated_file_scan_boundary = 25; | 37 my $generated_file_scan_boundary = 25; |
38 while (@files) { | 38 while (@files) { |
39 my $file = shift @files; | 39 my $file = shift @files; |
40 my $file_header = ''; | 40 my $file_header = ''; |
41 my %copyrights; | 41 my %copyrights; |
42 open (F, "<$file") or die "$progname: Unable to access $file\n"; | 42 open (F, "<$file") or die "$progname: Unable to access $file\n"; |
| 43 my $parse_copyright = start_copyright_parsing(); |
43 while (<F>) { | 44 while (<F>) { |
44 $file_header .= $_ unless $. > $generated_file_scan_boundary; | 45 $file_header .= $_ unless $. > $generated_file_scan_boundary; |
45 my $copyright_match = parse_copyright($_); | 46 my $copyright_match = $parse_copyright->($_, $.); |
46 if ($copyright_match) { | 47 if ($copyright_match) { |
47 $copyrights{lc("$copyright_match")} = "$copyright_match"; | 48 $copyrights{lc("$copyright_match")} = "$copyright_match"; |
48 } | 49 } |
49 } | 50 } |
50 close(F); | 51 close(F); |
51 my $copyright = join(" / ", values %copyrights); | 52 my $copyright = join(" / ", values %copyrights); |
52 print "$file\t"; | 53 print "$file\t"; |
53 if (check_is_generated_file($file_header)) { | 54 if (check_is_generated_file($file_header)) { |
54 print "GENERATED FILE"; | 55 print "GENERATED FILE"; |
55 } else { | 56 } else { |
(...skipping 18 matching lines...) Expand all Loading... |
74 if (index($license, 'DO NOT EDIT') != -1 || | 75 if (index($license, 'DO NOT EDIT') != -1 || |
75 index($license, 'DO NOT DELETE') != -1 || | 76 index($license, 'DO NOT DELETE') != -1 || |
76 index($license, 'GENERATED') != -1) { | 77 index($license, 'GENERATED') != -1) { |
77 return ($license =~ /(All changes made in this file will be lost' . | 78 return ($license =~ /(All changes made in this file will be lost' . |
78 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . | 79 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . |
79 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); | 80 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); |
80 } | 81 } |
81 return 0; | 82 return 0; |
82 } | 83 } |
83 | 84 |
84 sub parse_copyright($) { | 85 sub are_within_increasing_progression($$$) { |
85 my $line = $_[0]; | 86 my $delta = $_[0] - $_[1]; |
86 # Remove C / C++ strings to avoid false positives. | 87 return $delta >= 0 && $delta <= $_[2]; |
87 if (index($line, '"') != -1) { | 88 } |
88 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; | 89 |
| 90 sub start_copyright_parsing() { |
| 91 my $max_line_numbers_proximity = 3; |
| 92 # Set up the defaults the way that proximity checks will not succeed. |
| 93 my $last_a_item_line_number = -200; |
| 94 my $last_b_item_line_number = -100; |
| 95 |
| 96 return sub { |
| 97 my $line = $_[0]; |
| 98 my $line_number = $_[1]; |
| 99 |
| 100 # Remove C / C++ strings to avoid false positives. |
| 101 if (index($line, '"') != -1) { |
| 102 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; |
| 103 } |
| 104 |
| 105 my $uc_line = uc($line); |
| 106 |
| 107 # Record '(a)' and '(b)' last occurences in C++ comments. |
| 108 my $cpp_comment_idx = index($uc_line, '//'); |
| 109 if ($cpp_comment_idx != -1) { |
| 110 if (index($uc_line, '(A)') > $cpp_comment_idx) { |
| 111 $last_a_item_line_number = $line_number; |
| 112 } |
| 113 if (index($uc_line, '(B)') > $cpp_comment_idx) { |
| 114 $last_b_item_line_number = $line_number; |
| 115 } |
| 116 } |
| 117 |
| 118 # Fast bailout, uses the same patterns as the regexp. |
| 119 if (index($uc_line, 'COPYRIGHT') == -1 && |
| 120 index($uc_line, 'COPR.') == -1 && |
| 121 index($uc_line, '\x{00a9}') == -1 && |
| 122 index($uc_line, '\xc2\xa9') == -1) { |
| 123 |
| 124 my $c_item_index = index($uc_line, '(C)'); |
| 125 return '' if ($c_item_index == -1); |
| 126 # Filter out 'c' used as a list item inside C++ comments. |
| 127 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah" |
| 128 if ($c_item_index > $cpp_comment_idx && |
| 129 are_within_increasing_progression( |
| 130 $line_number, |
| 131 $last_b_item_line_number, |
| 132 $max_line_numbers_proximity) && |
| 133 are_within_increasing_progression( |
| 134 $last_b_item_line_number, |
| 135 $last_a_item_line_number, |
| 136 $max_line_numbers_proximity)) { |
| 137 return ''; |
| 138 } |
| 139 } |
| 140 |
| 141 my $copyright_indicator_regex = |
| 142 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; |
| 143 my $copyright_disindicator_regex = |
| 144 '\b(?:info(?:rmation)?|notice|and|or)\b'; |
| 145 |
| 146 my $copyright = ''; |
| 147 if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) { |
| 148 my $match = $1; |
| 149 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { |
| 150 $match =~ s/([,.])?\s*$//; |
| 151 $match =~ s/$copyright_indicator_regex//ig; |
| 152 $match =~ s/^\s+//; |
| 153 $match =~ s/\s{2,}/ /g; |
| 154 $match =~ s/\\@/@/g; |
| 155 $copyright = $match; |
| 156 } |
| 157 } |
| 158 |
| 159 return $copyright; |
89 } | 160 } |
90 | |
91 my $uc_line = uc($line); | |
92 # Fast bailout, uses the same patterns as the regexp. | |
93 return '' if (index($uc_line, 'COPYRIGHT') == -1 && | |
94 index($uc_line, 'COPR.') == -1 && | |
95 index($uc_line, '\x{00a9}') == -1 && | |
96 index($uc_line, '\xc2\xa9') == -1 && | |
97 index($uc_line, '(C)') == -1); | |
98 | |
99 my $copyright_indicator_regex = | |
100 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; | |
101 my $copyright_disindicator_regex = | |
102 '\b(?:info(?:rmation)?|notice|and|or)\b'; | |
103 | |
104 my $copyright = ''; | |
105 if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) { | |
106 my $match = $1; | |
107 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { | |
108 $match =~ s/([,.])?\s*$//; | |
109 $match =~ s/$copyright_indicator_regex//ig; | |
110 $match =~ s/^\s+//; | |
111 $match =~ s/\s{2,}/ /g; | |
112 $match =~ s/\\@/@/g; | |
113 $copyright = $match; | |
114 } | |
115 } | |
116 | |
117 return $copyright; | |
118 } | 161 } |
OLD | NEW |