OLD | NEW |
1 #!/usr/bin/perl | 1 #!/usr/bin/perl |
2 # Copyright 2008 The RE2 Authors. All Rights Reserved. | 2 # Copyright 2008 The RE2 Authors. All Rights Reserved. |
3 # Use of this source code is governed by a BSD-style | 3 # Use of this source code is governed by a BSD-style |
4 # license that can be found in the LICENSE file. | 4 # license that can be found in the LICENSE file. |
5 | 5 |
6 # Generate table entries giving character ranges | 6 # Generate table entries giving character ranges |
7 # for POSIX/Perl character classes. Rather than | 7 # for POSIX/Perl character classes. Rather than |
8 # figure out what the definition is, it is easier to ask | 8 # figure out what the definition is, it is easier to ask |
9 # Perl about each letter from 0-128 and write down | 9 # Perl about each letter from 0-128 and write down |
10 # its answer. | 10 # its answer. |
(...skipping 14 matching lines...) Expand all Loading... |
25 "[:word:]", | 25 "[:word:]", |
26 "[:xdigit:]", | 26 "[:xdigit:]", |
27 ); | 27 ); |
28 | 28 |
29 @perlclasses = ( | 29 @perlclasses = ( |
30 "\\d", | 30 "\\d", |
31 "\\s", | 31 "\\s", |
32 "\\w", | 32 "\\w", |
33 ); | 33 ); |
34 | 34 |
| 35 %overrides = ( |
| 36 # Prior to Perl 5.18, \s did not match vertical tab. |
| 37 # RE2 preserves that original behaviour. |
| 38 "\\s:11" => 0, |
| 39 ); |
| 40 |
35 sub ComputeClass($) { | 41 sub ComputeClass($) { |
| 42 my ($cname) = @_; |
36 my @ranges; | 43 my @ranges; |
37 my ($class) = @_; | 44 my $regexp = qr/[$cname]/; |
38 my $regexp = "[$class]"; | |
39 my $start = -1; | 45 my $start = -1; |
40 for (my $i=0; $i<=129; $i++) { | 46 for (my $i=0; $i<=129; $i++) { |
41 if ($i == 129) { $i = 256; } | 47 if ($i == 129) { $i = 256; } |
42 if ($i <= 128 && chr($i) =~ $regexp) { | 48 if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) { |
43 if ($start < 0) { | 49 if ($start < 0) { |
44 $start = $i; | 50 $start = $i; |
45 } | 51 } |
46 } else { | 52 } else { |
47 if ($start >= 0) { | 53 if ($start >= 0) { |
48 push @ranges, [$start, $i-1]; | 54 push @ranges, [$start, $i-1]; |
49 } | 55 } |
50 $start = -1; | 56 $start = -1; |
51 } | 57 } |
52 } | 58 } |
53 return @ranges; | 59 return @ranges; |
54 } | 60 } |
55 | 61 |
56 sub PrintClass($$@) { | 62 sub PrintClass($$@) { |
57 my ($cname, $name, @ranges) = @_; | 63 my ($cnum, $cname, @ranges) = @_; |
58 print "static URange16 code${cname}[] = { /* $name */\n"; | 64 print "static const URange16 code${cnum}[] = { /* $cname */\n"; |
59 for (my $i=0; $i<@ranges; $i++) { | 65 for (my $i=0; $i<@ranges; $i++) { |
60 my @a = @{$ranges[$i]}; | 66 my @a = @{$ranges[$i]}; |
61 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; | 67 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; |
62 } | 68 } |
63 print "};\n"; | 69 print "};\n"; |
64 my $n = @ranges; | 70 my $n = @ranges; |
65 my $escname = $name; | 71 my $escname = $cname; |
66 $escname =~ s/\\/\\\\/g; | 72 $escname =~ s/\\/\\\\/g; |
67 $negname = $escname; | 73 $negname = $escname; |
68 if ($negname =~ /:/) { | 74 if ($negname =~ /:/) { |
69 $negname =~ s/:/:^/; | 75 $negname =~ s/:/:^/; |
70 } else { | 76 } else { |
71 $negname =~ y/a-z/A-Z/; | 77 $negname =~ y/a-z/A-Z/; |
72 } | 78 } |
73 return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname
, $n }"; | 79 return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum,
$n }"; |
74 } | 80 } |
75 | 81 |
76 my $gen = 0; | 82 my $cnum = 0; |
77 | 83 |
78 sub PrintClasses($@) { | 84 sub PrintClasses($@) { |
79 my ($cname, @classes) = @_; | 85 my ($pname, @classes) = @_; |
80 my @entries; | 86 my @entries; |
81 foreach my $cl (@classes) { | 87 foreach my $cname (@classes) { |
82 my @ranges = ComputeClass($cl); | 88 my @ranges = ComputeClass($cname); |
83 push @entries, PrintClass(++$gen, $cl, @ranges); | 89 push @entries, PrintClass(++$cnum, $cname, @ranges); |
84 } | 90 } |
85 print "UGroup ${cname}_groups[] = {\n"; | 91 print "const UGroup ${pname}_groups[] = {\n"; |
86 foreach my $e (@entries) { | 92 foreach my $e (@entries) { |
87 print "\t$e,\n"; | 93 print "\t$e,\n"; |
88 } | 94 } |
89 print "};\n"; | 95 print "};\n"; |
90 my $count = @entries; | 96 my $count = @entries; |
91 print "int num_${cname}_groups = $count;\n"; | 97 print "const int num_${pname}_groups = $count;\n"; |
92 } | 98 } |
93 | 99 |
94 print <<EOF; | 100 print <<EOF; |
95 // GENERATED BY make_perl_groups.pl; DO NOT EDIT. | 101 // GENERATED BY make_perl_groups.pl; DO NOT EDIT. |
96 // make_perl_groups.pl >perl_groups.cc | 102 // make_perl_groups.pl >perl_groups.cc |
97 | 103 |
98 #include "re2/unicode_groups.h" | 104 #include "re2/unicode_groups.h" |
99 | 105 |
100 namespace re2 { | 106 namespace re2 { |
101 | 107 |
102 EOF | 108 EOF |
103 | 109 |
104 PrintClasses("perl", @perlclasses); | 110 PrintClasses("perl", @perlclasses); |
105 PrintClasses("posix", @posixclasses); | 111 PrintClasses("posix", @posixclasses); |
106 | 112 |
107 print <<EOF; | 113 print <<EOF; |
108 | 114 |
109 } // namespace re2 | 115 } // namespace re2 |
110 EOF | 116 EOF |
OLD | NEW |