OLD | NEW |
| (Empty) |
1 #!/usr/bin/perl -w | |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 # Use: echo filename1.cc ... | find_copyrights.pl | |
7 # or: find_copyrights.pl list_file | |
8 # or: find_files.pl ... | find_copyrights.pl | |
9 | |
10 use strict; | |
11 use warnings; | |
12 use File::Basename; | |
13 | |
14 sub check_is_generated_file($); | |
15 sub start_copyright_parsing(); | |
16 | |
17 my $progname = basename($0); | |
18 | |
19 my $generated_file_scan_boundary = 25; | |
20 while (<>) { | |
21 chomp; | |
22 my $file = $_; | |
23 my $file_header = ''; | |
24 my %copyrights; | |
25 open (F, "<$file") or die "$progname: Unable to access $file\n"; | |
26 my $parse_copyright = start_copyright_parsing(); | |
27 while (<F>) { | |
28 $file_header .= $_ unless $. > $generated_file_scan_boundary; | |
29 my $copyright_match = $parse_copyright->($_, $.); | |
30 if ($copyright_match) { | |
31 $copyrights{lc("$copyright_match")} = "$copyright_match"; | |
32 } | |
33 } | |
34 close(F); | |
35 my $copyright = join(" / ", sort values %copyrights); | |
36 print "$file\t"; | |
37 if (check_is_generated_file($file_header)) { | |
38 print "GENERATED FILE"; | |
39 } else { | |
40 print ($copyright or "*No copyright*"); | |
41 } | |
42 print "\n"; | |
43 } | |
44 | |
45 sub check_is_generated_file($) { | |
46 my $license = uc($_[0]); | |
47 # Remove Python multiline comments to avoid false positives | |
48 if (index($license, '"""') != -1) { | |
49 $license =~ s/"""[^"]*(?:"""|$)//mg; | |
50 } | |
51 if (index($license, "'''") != -1) { | |
52 $license =~ s/'''[^']*(?:'''|$)//mg; | |
53 } | |
54 # Quick checks using index. | |
55 if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) { | |
56 return 1; | |
57 } | |
58 if (index($license, 'DO NOT EDIT') != -1 || | |
59 index($license, 'DO NOT DELETE') != -1 || | |
60 index($license, 'GENERATED') != -1) { | |
61 return ($license =~ /(All changes made in this file will be lost' . | |
62 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . | |
63 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); | |
64 } | |
65 return 0; | |
66 } | |
67 | |
68 sub are_within_increasing_progression($$$) { | |
69 my $delta = $_[0] - $_[1]; | |
70 return $delta >= 0 && $delta <= $_[2]; | |
71 } | |
72 | |
73 sub start_copyright_parsing() { | |
74 my $max_line_numbers_proximity = 3; | |
75 # Set up the defaults the way that proximity checks will not succeed. | |
76 my $last_a_item_line_number = -200; | |
77 my $last_b_item_line_number = -100; | |
78 | |
79 return sub { | |
80 my $line = $_[0]; | |
81 my $line_number = $_[1]; | |
82 | |
83 # Remove C / C++ strings to avoid false positives. | |
84 if (index($line, '"') != -1) { | |
85 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; | |
86 } | |
87 | |
88 my $uc_line = uc($line); | |
89 | |
90 # Record '(a)' and '(b)' last occurences in C++ comments. | |
91 my $cpp_comment_idx = index($uc_line, '//'); | |
92 if ($cpp_comment_idx != -1) { | |
93 if (index($uc_line, '(A)') > $cpp_comment_idx) { | |
94 $last_a_item_line_number = $line_number; | |
95 } | |
96 if (index($uc_line, '(B)') > $cpp_comment_idx) { | |
97 $last_b_item_line_number = $line_number; | |
98 } | |
99 } | |
100 | |
101 # Fast bailout, uses the same patterns as the regexp. | |
102 if (index($uc_line, 'COPYRIGHT') == -1 && | |
103 index($uc_line, 'COPR.') == -1 && | |
104 index($uc_line, '\x{00a9}') == -1 && | |
105 index($uc_line, '\xc2\xa9') == -1) { | |
106 | |
107 my $c_item_index = index($uc_line, '(C)'); | |
108 return '' if ($c_item_index == -1); | |
109 # Filter out 'c' used as a list item inside C++ comments. | |
110 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah" | |
111 if ($c_item_index > $cpp_comment_idx && | |
112 are_within_increasing_progression( | |
113 $line_number, | |
114 $last_b_item_line_number, | |
115 $max_line_numbers_proximity) && | |
116 are_within_increasing_progression( | |
117 $last_b_item_line_number, | |
118 $last_a_item_line_number, | |
119 $max_line_numbers_proximity)) { | |
120 return ''; | |
121 } | |
122 } | |
123 | |
124 my $copyright_indicator_regex = | |
125 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; | |
126 my $full_copyright_indicator_regex = | |
127 sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex; | |
128 my $copyright_disindicator_regex = | |
129 '\b(?:info(?:rmation)?|notice|and|or)\b'; | |
130 | |
131 my $copyright = ''; | |
132 if ($line =~ m%$full_copyright_indicator_regex%i) { | |
133 my $match = $1; | |
134 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { | |
135 $match =~ s/([,.])?\s*$//; | |
136 $match =~ s/$copyright_indicator_regex//ig; | |
137 $match =~ s/^\s+//; | |
138 $match =~ s/\s{2,}/ /g; | |
139 $match =~ s/\\@/@/g; | |
140 $copyright = $match; | |
141 } | |
142 } | |
143 | |
144 return $copyright; | |
145 } | |
146 } | |
OLD | NEW |