OLD | NEW |
---|---|
1 #!/usr/bin/perl -w | 1 #!/usr/bin/perl -w |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 # Use: find_copyrights.pl <start-from> [exclude-dir ...] | 6 # Use: find_copyrights.pl <start-from> [exclude-dir ...] |
7 | 7 |
8 use strict; | 8 use strict; |
9 use warnings; | 9 use warnings; |
10 use File::Basename; | 10 use File::Basename; |
11 | 11 |
12 sub check_is_generated_file($); | 12 sub check_is_generated_file($); |
13 sub parse_copyright($); | 13 sub start_copyright_parsing(); |
14 | 14 |
15 my $progname = basename($0); | 15 my $progname = basename($0); |
16 | 16 |
17 my $root_dir = shift @ARGV; | 17 my $root_dir = shift @ARGV; |
18 my @find_args = (); | 18 my @find_args = (); |
19 while (@ARGV) { | 19 while (@ARGV) { |
20 my $path = shift @ARGV; | 20 my $path = shift @ARGV; |
21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )' | 21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )' |
22 } | 22 } |
23 push @find_args, qw(-follow -type f -print); | 23 push @find_args, qw(-follow -type f -print); |
24 | 24 |
25 open FIND, '-|', 'find', $root_dir, @find_args | 25 open FIND, '-|', 'find', $root_dir, @find_args |
26 or die "$progname: Couldn't exec find: $!\n"; | 26 or die "$progname: Couldn't exec find: $!\n"; |
27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' . | 27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' . |
28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' . | 28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' . |
29 '|tex|mli?)$'; | 29 '|tex|mli?)$'; |
30 my @files = (); | 30 my @files = (); |
31 while (<FIND>) { | 31 while (<FIND>) { |
32 chomp; | 32 chomp; |
33 push @files, $_ unless (-z $_ || !m%$check_regex%); | 33 push @files, $_ unless (-z $_ || !m%$check_regex%); |
34 } | 34 } |
35 close FIND; | 35 close FIND; |
36 | 36 |
37 my $generated_file_scan_boundary = 25; | 37 my $generated_file_scan_boundary = 25; |
38 while (@files) { | 38 while (@files) { |
39 my $file = shift @files; | 39 my $file = shift @files; |
40 my $file_header = ''; | 40 my $file_header = ''; |
41 my %copyrights; | 41 my %copyrights; |
42 open (F, "<$file") or die "$progname: Unable to access $file\n"; | 42 open (F, "<$file") or die "$progname: Unable to access $file\n"; |
43 my $parse_copyright = start_copyright_parsing(); | |
43 while (<F>) { | 44 while (<F>) { |
44 $file_header .= $_ unless $. > $generated_file_scan_boundary; | 45 $file_header .= $_ unless $. > $generated_file_scan_boundary; |
45 my $copyright_match = parse_copyright($_); | 46 my $copyright_match = $parse_copyright->($_, $.); |
46 if ($copyright_match) { | 47 if ($copyright_match) { |
47 $copyrights{lc("$copyright_match")} = "$copyright_match"; | 48 $copyrights{lc("$copyright_match")} = "$copyright_match"; |
48 } | 49 } |
49 } | 50 } |
50 close(F); | 51 close(F); |
51 my $copyright = join(" / ", values %copyrights); | 52 my $copyright = join(" / ", values %copyrights); |
52 print "$file\t"; | 53 print "$file\t"; |
53 if (check_is_generated_file($file_header)) { | 54 if (check_is_generated_file($file_header)) { |
54 print "GENERATED FILE"; | 55 print "GENERATED FILE"; |
55 } else { | 56 } else { |
(...skipping 18 matching lines...) Expand all Loading... | |
74 if (index($license, 'DO NOT EDIT') != -1 || | 75 if (index($license, 'DO NOT EDIT') != -1 || |
75 index($license, 'DO NOT DELETE') != -1 || | 76 index($license, 'DO NOT DELETE') != -1 || |
76 index($license, 'GENERATED') != -1) { | 77 index($license, 'GENERATED') != -1) { |
77 return ($license =~ /(All changes made in this file will be lost' . | 78 return ($license =~ /(All changes made in this file will be lost' . |
78 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . | 79 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . |
79 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); | 80 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); |
80 } | 81 } |
81 return 0; | 82 return 0; |
82 } | 83 } |
83 | 84 |
84 sub parse_copyright($) { | 85 sub are_within_increasing_progression($$$) { |
85 my $line = $_[0]; | 86 my $delta = $_[0] - $_[1]; |
86 # Remove C / C++ strings to avoid false positives. | 87 return $delta >= 0 && $delta <= $_[2]; |
87 if (index($line, '"') != -1) { | 88 } |
88 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; | 89 |
90 sub start_copyright_parsing() { | |
91 my $max_line_numbers_proximity = 3; | |
92 # Set up the defaults the way that proximity checks will not succeed. | |
93 my $last_a_item_line_number = -200; | |
94 my $last_b_item_line_number = -100; | |
95 | |
96 return sub { | |
97 my $line = $_[0]; | |
98 # Remove C / C++ strings to avoid false positives. | |
mkosiba (inactive)
2013/01/31 14:39:49
nit: assign the line number here as well?
mnaganov (inactive)
2013/01/31 14:53:03
Makes sense. Done.
| |
99 if (index($line, '"') != -1) { | |
100 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; | |
101 } | |
102 | |
103 my $uc_line = uc($line); | |
104 | |
105 # Record '(a)' and '(b)' last occurences in C++ comments. | |
106 my $line_number = $_[1]; | |
107 my $cpp_comment_idx = index($uc_line, '//'); | |
108 if ($cpp_comment_idx != -1) { | |
109 if (index($uc_line, '(A)') > $cpp_comment_idx) { | |
110 $last_a_item_line_number = $line_number; | |
111 } | |
112 if (index($uc_line, '(B)') > $cpp_comment_idx) { | |
113 $last_b_item_line_number = $line_number; | |
114 } | |
115 } | |
116 | |
117 # Fast bailout, uses the same patterns as the regexp. | |
118 if (index($uc_line, 'COPYRIGHT') == -1 && | |
119 index($uc_line, 'COPR.') == -1 && | |
120 index($uc_line, '\x{00a9}') == -1 && | |
121 index($uc_line, '\xc2\xa9') == -1) { | |
122 | |
123 my $c_item_index = index($uc_line, '(C)'); | |
124 return '' if ($c_item_index == -1); | |
125 # Filter out 'c' used as a list item inside C++ comments. | |
126 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah" | |
127 if ($c_item_index > $cpp_comment_idx && | |
128 are_within_increasing_progression( | |
129 $line_number, | |
130 $last_b_item_line_number, | |
131 $max_line_numbers_proximity) && | |
132 are_within_increasing_progression( | |
133 $last_b_item_line_number, | |
134 $last_a_item_line_number, | |
135 $max_line_numbers_proximity)) { | |
136 return ''; | |
137 } | |
138 } | |
139 | |
140 my $copyright_indicator_regex = | |
141 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; | |
142 my $copyright_disindicator_regex = | |
143 '\b(?:info(?:rmation)?|notice|and|or)\b'; | |
144 | |
145 my $copyright = ''; | |
146 if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) { | |
147 my $match = $1; | |
148 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { | |
149 $match =~ s/([,.])?\s*$//; | |
150 $match =~ s/$copyright_indicator_regex//ig; | |
151 $match =~ s/^\s+//; | |
152 $match =~ s/\s{2,}/ /g; | |
153 $match =~ s/\\@/@/g; | |
154 $copyright = $match; | |
155 } | |
156 } | |
157 | |
158 return $copyright; | |
89 } | 159 } |
90 | |
91 my $uc_line = uc($line); | |
92 # Fast bailout, uses the same patterns as the regexp. | |
93 return '' if (index($uc_line, 'COPYRIGHT') == -1 && | |
94 index($uc_line, 'COPR.') == -1 && | |
95 index($uc_line, '\x{00a9}') == -1 && | |
96 index($uc_line, '\xc2\xa9') == -1 && | |
97 index($uc_line, '(C)') == -1); | |
98 | |
99 my $copyright_indicator_regex = | |
100 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; | |
101 my $copyright_disindicator_regex = | |
102 '\b(?:info(?:rmation)?|notice|and|or)\b'; | |
103 | |
104 my $copyright = ''; | |
105 if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) { | |
106 my $match = $1; | |
107 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { | |
108 $match =~ s/([,.])?\s*$//; | |
109 $match =~ s/$copyright_indicator_regex//ig; | |
110 $match =~ s/^\s+//; | |
111 $match =~ s/\s{2,}/ /g; | |
112 $match =~ s/\\@/@/g; | |
113 $copyright = $match; | |
114 } | |
115 } | |
116 | |
117 return $copyright; | |
118 } | 160 } |
OLD | NEW |