OLD | NEW |
| (Empty) |
1 #!/usr/bin/perl | |
2 # Copyright 2008 The RE2 Authors. All Rights Reserved. | |
3 # Use of this source code is governed by a BSD-style | |
4 # license that can be found in the LICENSE file. | |
5 | |
6 # Generate table entries giving character ranges | |
7 # for POSIX/Perl character classes. Rather than | |
8 # figure out what the definition is, it is easier to ask | |
9 # Perl about each letter from 0-128 and write down | |
10 # its answer. | |
11 | |
12 @posixclasses = ( | |
13 "[:alnum:]", | |
14 "[:alpha:]", | |
15 "[:ascii:]", | |
16 "[:blank:]", | |
17 "[:cntrl:]", | |
18 "[:digit:]", | |
19 "[:graph:]", | |
20 "[:lower:]", | |
21 "[:print:]", | |
22 "[:punct:]", | |
23 "[:space:]", | |
24 "[:upper:]", | |
25 "[:word:]", | |
26 "[:xdigit:]", | |
27 ); | |
28 | |
29 @perlclasses = ( | |
30 "\\d", | |
31 "\\s", | |
32 "\\w", | |
33 ); | |
34 | |
35 %overrides = ( | |
36 # Prior to Perl 5.18, \s did not match vertical tab. | |
37 # RE2 preserves that original behaviour. | |
38 "\\s:11" => 0, | |
39 ); | |
40 | |
41 sub ComputeClass($) { | |
42 my ($cname) = @_; | |
43 my @ranges; | |
44 my $regexp = qr/[$cname]/; | |
45 my $start = -1; | |
46 for (my $i=0; $i<=129; $i++) { | |
47 if ($i == 129) { $i = 256; } | |
48 if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) { | |
49 if ($start < 0) { | |
50 $start = $i; | |
51 } | |
52 } else { | |
53 if ($start >= 0) { | |
54 push @ranges, [$start, $i-1]; | |
55 } | |
56 $start = -1; | |
57 } | |
58 } | |
59 return @ranges; | |
60 } | |
61 | |
62 sub PrintClass($$@) { | |
63 my ($cnum, $cname, @ranges) = @_; | |
64 print "static const URange16 code${cnum}[] = { /* $cname */\n"; | |
65 for (my $i=0; $i<@ranges; $i++) { | |
66 my @a = @{$ranges[$i]}; | |
67 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; | |
68 } | |
69 print "};\n"; | |
70 my $n = @ranges; | |
71 my $escname = $cname; | |
72 $escname =~ s/\\/\\\\/g; | |
73 $negname = $escname; | |
74 if ($negname =~ /:/) { | |
75 $negname =~ s/:/:^/; | |
76 } else { | |
77 $negname =~ y/a-z/A-Z/; | |
78 } | |
79 return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum,
$n }"; | |
80 } | |
81 | |
82 my $cnum = 0; | |
83 | |
84 sub PrintClasses($@) { | |
85 my ($pname, @classes) = @_; | |
86 my @entries; | |
87 foreach my $cname (@classes) { | |
88 my @ranges = ComputeClass($cname); | |
89 push @entries, PrintClass(++$cnum, $cname, @ranges); | |
90 } | |
91 print "const UGroup ${pname}_groups[] = {\n"; | |
92 foreach my $e (@entries) { | |
93 print "\t$e,\n"; | |
94 } | |
95 print "};\n"; | |
96 my $count = @entries; | |
97 print "const int num_${pname}_groups = $count;\n"; | |
98 } | |
99 | |
100 print <<EOF; | |
101 // GENERATED BY make_perl_groups.pl; DO NOT EDIT. | |
102 // make_perl_groups.pl >perl_groups.cc | |
103 | |
104 #include "re2/unicode_groups.h" | |
105 | |
106 namespace re2 { | |
107 | |
108 EOF | |
109 | |
110 PrintClasses("perl", @perlclasses); | |
111 PrintClasses("posix", @posixclasses); | |
112 | |
113 print <<EOF; | |
114 | |
115 } // namespace re2 | |
116 EOF | |
OLD | NEW |