| Index: android_webview/tools/find_copyrights.pl
|
| diff --git a/android_webview/tools/find_copyrights.pl b/android_webview/tools/find_copyrights.pl
|
| index ff00b4e8003fe87a524bdb33f35cb39a2918c603..239b147e123f4af8ffa0988cad5d51caa146082c 100755
|
| --- a/android_webview/tools/find_copyrights.pl
|
| +++ b/android_webview/tools/find_copyrights.pl
|
| @@ -10,7 +10,7 @@ use warnings;
|
| use File::Basename;
|
|
|
| sub check_is_generated_file($);
|
| -sub parse_copyright($);
|
| +sub start_copyright_parsing();
|
|
|
| my $progname = basename($0);
|
|
|
| @@ -40,9 +40,10 @@ while (@files) {
|
| my $file_header = '';
|
| my %copyrights;
|
| open (F, "<$file") or die "$progname: Unable to access $file\n";
|
| + my $parse_copyright = start_copyright_parsing();
|
| while (<F>) {
|
| $file_header .= $_ unless $. > $generated_file_scan_boundary;
|
| - my $copyright_match = parse_copyright($_);
|
| + my $copyright_match = $parse_copyright->($_, $.);
|
| if ($copyright_match) {
|
| $copyrights{lc("$copyright_match")} = "$copyright_match";
|
| }
|
| @@ -81,38 +82,80 @@ sub check_is_generated_file($) {
|
| return 0;
|
| }
|
|
|
| -sub parse_copyright($) {
|
| - my $line = $_[0];
|
| - # Remove C / C++ strings to avoid false positives.
|
| - if (index($line, '"') != -1) {
|
| - $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
|
| - }
|
| +sub are_within_increasing_progression($$$) {
|
| + my $delta = $_[0] - $_[1];
|
| + return $delta >= 0 && $delta <= $_[2];
|
| +}
|
| +
|
| +sub start_copyright_parsing() {
|
| + my $max_line_numbers_proximity = 3;
|
| + # Set up the defaults the way that proximity checks will not succeed.
|
| + my $last_a_item_line_number = -200;
|
| + my $last_b_item_line_number = -100;
|
| +
|
| + return sub {
|
| + my $line = $_[0];
|
| + my $line_number = $_[1];
|
|
|
| - my $uc_line = uc($line);
|
| - # Fast bailout, uses the same patterns as the regexp.
|
| - return '' if (index($uc_line, 'COPYRIGHT') == -1 &&
|
| - index($uc_line, 'COPR.') == -1 &&
|
| - index($uc_line, '\x{00a9}') == -1 &&
|
| - index($uc_line, '\xc2\xa9') == -1 &&
|
| - index($uc_line, '(C)') == -1);
|
| -
|
| - my $copyright_indicator_regex =
|
| - '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
|
| - my $copyright_disindicator_regex =
|
| - '\b(?:info(?:rmation)?|notice|and|or)\b';
|
| -
|
| - my $copyright = '';
|
| - if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) {
|
| - my $match = $1;
|
| - if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
|
| - $match =~ s/([,.])?\s*$//;
|
| - $match =~ s/$copyright_indicator_regex//ig;
|
| - $match =~ s/^\s+//;
|
| - $match =~ s/\s{2,}/ /g;
|
| - $match =~ s/\\@/@/g;
|
| - $copyright = $match;
|
| + # Remove C / C++ strings to avoid false positives.
|
| + if (index($line, '"') != -1) {
|
| + $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
|
| + }
|
| +
|
| + my $uc_line = uc($line);
|
| +
|
| + # Record '(a)' and '(b)' last occurences in C++ comments.
|
| + my $cpp_comment_idx = index($uc_line, '//');
|
| + if ($cpp_comment_idx != -1) {
|
| + if (index($uc_line, '(A)') > $cpp_comment_idx) {
|
| + $last_a_item_line_number = $line_number;
|
| + }
|
| + if (index($uc_line, '(B)') > $cpp_comment_idx) {
|
| + $last_b_item_line_number = $line_number;
|
| + }
|
| }
|
| - }
|
|
|
| - return $copyright;
|
| + # Fast bailout, uses the same patterns as the regexp.
|
| + if (index($uc_line, 'COPYRIGHT') == -1 &&
|
| + index($uc_line, 'COPR.') == -1 &&
|
| + index($uc_line, '\x{00a9}') == -1 &&
|
| + index($uc_line, '\xc2\xa9') == -1) {
|
| +
|
| + my $c_item_index = index($uc_line, '(C)');
|
| + return '' if ($c_item_index == -1);
|
| + # Filter out 'c' used as a list item inside C++ comments.
|
| + # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
|
| + if ($c_item_index > $cpp_comment_idx &&
|
| + are_within_increasing_progression(
|
| + $line_number,
|
| + $last_b_item_line_number,
|
| + $max_line_numbers_proximity) &&
|
| + are_within_increasing_progression(
|
| + $last_b_item_line_number,
|
| + $last_a_item_line_number,
|
| + $max_line_numbers_proximity)) {
|
| + return '';
|
| + }
|
| + }
|
| +
|
| + my $copyright_indicator_regex =
|
| + '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
|
| + my $copyright_disindicator_regex =
|
| + '\b(?:info(?:rmation)?|notice|and|or)\b';
|
| +
|
| + my $copyright = '';
|
| + if ($line =~ m%\W$copyright_indicator_regex(?::\s*|\s+)(\w.*)$%i) {
|
| + my $match = $1;
|
| + if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
|
| + $match =~ s/([,.])?\s*$//;
|
| + $match =~ s/$copyright_indicator_regex//ig;
|
| + $match =~ s/^\s+//;
|
| + $match =~ s/\s{2,}/ /g;
|
| + $match =~ s/\\@/@/g;
|
| + $copyright = $match;
|
| + }
|
| + }
|
| +
|
| + return $copyright;
|
| + }
|
| }
|
|
|