android_webview/tools/find_copyrights.pl - Issue 12087102: [Android WebView] Avoid detecting '(c)' as a copyright sign in lists.

Side by Side Diff: android_webview/tools/find_copyrights.pl

Issue 12087102: [Android WebView] Avoid detecting '(c)' as a copyright sign in lists. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Comments addressed Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/perl -w	1 #!/usr/bin/perl -w

2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 # Use: find_copyrights.pl <start-from> [exclude-dir ...]	6 # Use: find_copyrights.pl <start-from> [exclude-dir ...]

7	7

8 use strict;	8 use strict;

9 use warnings;	9 use warnings;

10 use File::Basename;	10 use File::Basename;

11	11

12 sub check_is_generated_file($);	12 sub check_is_generated_file($);

13 sub parse_copyright($);	13 sub start_copyright_parsing();

14	14

15 my $progname = basename($0);	15 my $progname = basename($0);

16	16

17 my $root_dir = shift @ARGV;	17 my $root_dir = shift @ARGV;

18 my @find_args = ();	18 my @find_args = ();

19 while (@ARGV) {	19 while (@ARGV) {

20 my $path = shift @ARGV;	20 my $path = shift @ARGV;

21 push @find_args, qw'-not ( -path', "/$path/", qw'-prune )'	21 push @find_args, qw'-not ( -path', "/$path/", qw'-prune )'

22 }	22 }

23 push @find_args, qw(-follow -type f -print);	23 push @find_args, qw(-follow -type f -print);

24	24

25 open FIND, '-\|', 'find', $root_dir, @find_args	25 open FIND, '-\|', 'find', $root_dir, @find_args

26 or die "$progname: Couldn't exec find: $!\n";	26 or die "$progname: Couldn't exec find: $!\n";

27 my $check_regex = '\.(asm\|c(c\|pp\|xx)?\|h(h\|pp\|xx)?\|p(l\|m)\|xs\|sh\|php\|py(\|x)' .	27 my $check_regex = '\.(asm\|c(c\|pp\|xx)?\|h(h\|pp\|xx)?\|p(l\|m)\|xs\|sh\|php\|py(\|x)' .

28 '\|rb\|idl\|java\|el\|sc(i\|e)\|cs\|pas\|inc\|js\|pac\|html\|dtd\|xsl\|mod\|mm?' .	28 '\|rb\|idl\|java\|el\|sc(i\|e)\|cs\|pas\|inc\|js\|pac\|html\|dtd\|xsl\|mod\|mm?' .

29 '\|tex\|mli?)$';	29 '\|tex\|mli?)$';

30 my @files = ();	30 my @files = ();

31 while (<FIND>) {	31 while (<FIND>) {

32 chomp;	32 chomp;

33 push @files, $_ unless (-z $_ \|\| !m%$check_regex%);	33 push @files, $_ unless (-z $_ \|\| !m%$check_regex%);

34 }	34 }

35 close FIND;	35 close FIND;

36	36

37 my $generated_file_scan_boundary = 25;	37 my $generated_file_scan_boundary = 25;

38 while (@files) {	38 while (@files) {

39 my $file = shift @files;	39 my $file = shift @files;

40 my $file_header = '';	40 my $file_header = '';

41 my %copyrights;	41 my %copyrights;

42 open (F, "<$file") or die "$progname: Unable to access $file\n";	42 open (F, "<$file") or die "$progname: Unable to access $file\n";

	43 my $parse_copyright = start_copyright_parsing();

43 while (<F>) {	44 while (<F>) {

44 $file_header .= $_ unless $. > $generated_file_scan_boundary;	45 $file_header .= $_ unless $. > $generated_file_scan_boundary;

45 my $copyright_match = parse_copyright($_);	46 my $copyright_match = $parse_copyright->($_, $.);

46 if ($copyright_match) {	47 if ($copyright_match) {

47 $copyrights{lc("$copyright_match")} = "$copyright_match";	48 $copyrights{lc("$copyright_match")} = "$copyright_match";

48 }	49 }

49 }	50 }

50 close(F);	51 close(F);

51 my $copyright = join(" / ", values %copyrights);	52 my $copyright = join(" / ", values %copyrights);

52 print "$file\t";	53 print "$file\t";

53 if (check_is_generated_file($file_header)) {	54 if (check_is_generated_file($file_header)) {

54 print "GENERATED FILE";	55 print "GENERATED FILE";

55 } else {	56 } else {

(...skipping 18 matching lines...) Expand all Loading...
74 if (index($license, 'DO NOT EDIT') != -1 \|\|	75 if (index($license, 'DO NOT EDIT') != -1 \|\|

75 index($license, 'DO NOT DELETE') != -1 \|\|	76 index($license, 'DO NOT DELETE') != -1 \|\|

76 index($license, 'GENERATED') != -1) {	77 index($license, 'GENERATED') != -1) {

77 return ($license =~ /(All changes made in this file will be lost' .	78 return ($license =~ /(All changes made in this file will be lost' .

78 'DO NOT (EDIT\|delete this file)\|Generated (at\|automatically\|data)' .	79 'DO NOT (EDIT\|delete this file)\|Generated (at\|automatically\|data)' .

79 '\|Automatically generated\|\Wgenerated\s+(?:\w+\s+)*file\W)/i);	80 '\|Automatically generated\|\Wgenerated\s+(?:\w+\s+)*file\W)/i);

80 }	81 }

81 return 0;	82 return 0;

82 }	83 }

83	84

84 sub parse_copyright($) {	85 sub are_within_increasing_progression($$$) {

85 my $line = $_[0];	86 my $delta = $_[0] - $_[1];

86 # Remove C / C++ strings to avoid false positives.	87 return $delta >= 0 && $delta <= $_[2];

87 if (index($line, '"') != -1) {	88 }

88 $line =~ s/"[^"\\](?:\\.[^"\\])*"//g;	89

	90 sub start_copyright_parsing() {

	91 my $max_line_numbers_proximity = 3;

	92 # Set up the defaults the way that proximity checks will not succeed.

	93 my $last_a_item_line_number = -200;

	94 my $last_b_item_line_number = -100;

	95

	96 return sub {

	97 my $line = $_[0];

	98 my $line_number = $_[1];

	99

	100 # Remove C / C++ strings to avoid false positives.

	101 if (index($line, '"') != -1) {

	102 $line =~ s/"[^"\\](?:\\.[^"\\])*"//g;

	103 }

	104

	105 my $uc_line = uc($line);

	106

	107 # Record '(a)' and '(b)' last occurences in C++ comments.

	108 my $cpp_comment_idx = index($uc_line, '//');

	109 if ($cpp_comment_idx != -1) {

	110 if (index($uc_line, '(A)') > $cpp_comment_idx) {

	111 $last_a_item_line_number = $line_number;

	112 }

	113 if (index($uc_line, '(B)') > $cpp_comment_idx) {

	114 $last_b_item_line_number = $line_number;

	115 }

	116 }

	117

	118 # Fast bailout, uses the same patterns as the regexp.

	119 if (index($uc_line, 'COPYRIGHT') == -1 &&

	120 index($uc_line, 'COPR.') == -1 &&

	121 index($uc_line, '\x{00a9}') == -1 &&

	122 index($uc_line, '\xc2\xa9') == -1) {

	123

	124 my $c_item_index = index($uc_line, '(C)');

	125 return '' if ($c_item_index == -1);

	126 # Filter out 'c' used as a list item inside C++ comments.

	127 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"

	128 if ($c_item_index > $cpp_comment_idx &&

	129 are_within_increasing_progression(

	130 $line_number,

	131 $last_b_item_line_number,

	132 $max_line_numbers_proximity) &&

	133 are_within_increasing_progression(

	134 $last_b_item_line_number,

	135 $last_a_item_line_number,

	136 $max_line_numbers_proximity)) {

	137 return '';

	138 }

	139 }

	140

	141 my $copyright_indicator_regex =

	142 '(?:copyright\|copr\.\|\x{00a9}\|\xc2\xa9\|$c$)';

	143 my $copyright_disindicator_regex =

	144 '\b(?:info(?:rmation)?\|notice\|and\|or)\b';

	145

	146 my $copyright = '';

	147 if ($line =~ m%\W$copyright_indicator_regex(?::\s\|\s+)(\w.)$%i) {

	148 my $match = $1;

	149 if ($match !~ m%^\s*$copyright_disindicator_regex%i) {

	150 $match =~ s/([,.])?\s*$//;

	151 $match =~ s/$copyright_indicator_regex//ig;

	152 $match =~ s/^\s+//;

	153 $match =~ s/\s{2,}/ /g;

	154 $match =~ s/\\@/@/g;

	155 $copyright = $match;

	156 }

	157 }

	158

	159 return $copyright;

89 }	160 }

90

91 my $uc_line = uc($line);

92 # Fast bailout, uses the same patterns as the regexp.

93 return '' if (index($uc_line, 'COPYRIGHT') == -1 &&

94 index($uc_line, 'COPR.') == -1 &&

95 index($uc_line, '\x{00a9}') == -1 &&

96 index($uc_line, '\xc2\xa9') == -1 &&

97 index($uc_line, '(C)') == -1);

98

99 my $copyright_indicator_regex =

100 '(?:copyright\|copr\.\|\x{00a9}\|\xc2\xa9\|$c$)';

101 my $copyright_disindicator_regex =

102 '\b(?:info(?:rmation)?\|notice\|and\|or)\b';

103

104 my $copyright = '';

105 if ($line =~ m%\W$copyright_indicator_regex(?::\s\|\s+)(\w.)$%i) {

106 my $match = $1;

107 if ($match !~ m%^\s*$copyright_disindicator_regex%i) {

108 $match =~ s/([,.])?\s*$//;

109 $match =~ s/$copyright_indicator_regex//ig;

110 $match =~ s/^\s+//;

111 $match =~ s/\s{2,}/ /g;

112 $match =~ s/\\@/@/g;

113 $copyright = $match;

114 }

115 }

116

117 return $copyright;

118 }	161 }

OLD	NEW

« no previous file with comments | « no previous file | android_webview/tools/third_party_files_whitelist.txt » ('j') | no next file with comments »