| OLD | NEW |
| 1 #!/usr/bin/perl | 1 #!/usr/bin/perl |
| 2 # Copyright 2008 The RE2 Authors. All Rights Reserved. | 2 # Copyright 2008 The RE2 Authors. All Rights Reserved. |
| 3 # Use of this source code is governed by a BSD-style | 3 # Use of this source code is governed by a BSD-style |
| 4 # license that can be found in the LICENSE file. | 4 # license that can be found in the LICENSE file. |
| 5 | 5 |
| 6 # Generate table entries giving character ranges | 6 # Generate table entries giving character ranges |
| 7 # for POSIX/Perl character classes. Rather than | 7 # for POSIX/Perl character classes. Rather than |
| 8 # figure out what the definition is, it is easier to ask | 8 # figure out what the definition is, it is easier to ask |
| 9 # Perl about each letter from 0-128 and write down | 9 # Perl about each letter from 0-128 and write down |
| 10 # its answer. | 10 # its answer. |
| (...skipping 14 matching lines...) Expand all Loading... |
| 25 "[:word:]", | 25 "[:word:]", |
| 26 "[:xdigit:]", | 26 "[:xdigit:]", |
| 27 ); | 27 ); |
| 28 | 28 |
| 29 @perlclasses = ( | 29 @perlclasses = ( |
| 30 "\\d", | 30 "\\d", |
| 31 "\\s", | 31 "\\s", |
| 32 "\\w", | 32 "\\w", |
| 33 ); | 33 ); |
| 34 | 34 |
| 35 %overrides = ( |
| 36 # Prior to Perl 5.18, \s did not match vertical tab. |
| 37 # RE2 preserves that original behaviour. |
| 38 "\\s:11" => 0, |
| 39 ); |
| 40 |
| 35 sub ComputeClass($) { | 41 sub ComputeClass($) { |
| 42 my ($cname) = @_; |
| 36 my @ranges; | 43 my @ranges; |
| 37 my ($class) = @_; | 44 my $regexp = qr/[$cname]/; |
| 38 my $regexp = "[$class]"; | |
| 39 my $start = -1; | 45 my $start = -1; |
| 40 for (my $i=0; $i<=129; $i++) { | 46 for (my $i=0; $i<=129; $i++) { |
| 41 if ($i == 129) { $i = 256; } | 47 if ($i == 129) { $i = 256; } |
| 42 if ($i <= 128 && chr($i) =~ $regexp) { | 48 if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) { |
| 43 if ($start < 0) { | 49 if ($start < 0) { |
| 44 $start = $i; | 50 $start = $i; |
| 45 } | 51 } |
| 46 } else { | 52 } else { |
| 47 if ($start >= 0) { | 53 if ($start >= 0) { |
| 48 push @ranges, [$start, $i-1]; | 54 push @ranges, [$start, $i-1]; |
| 49 } | 55 } |
| 50 $start = -1; | 56 $start = -1; |
| 51 } | 57 } |
| 52 } | 58 } |
| 53 return @ranges; | 59 return @ranges; |
| 54 } | 60 } |
| 55 | 61 |
| 56 sub PrintClass($$@) { | 62 sub PrintClass($$@) { |
| 57 my ($cname, $name, @ranges) = @_; | 63 my ($cnum, $cname, @ranges) = @_; |
| 58 print "static URange16 code${cname}[] = { /* $name */\n"; | 64 print "static const URange16 code${cnum}[] = { /* $cname */\n"; |
| 59 for (my $i=0; $i<@ranges; $i++) { | 65 for (my $i=0; $i<@ranges; $i++) { |
| 60 my @a = @{$ranges[$i]}; | 66 my @a = @{$ranges[$i]}; |
| 61 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; | 67 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; |
| 62 } | 68 } |
| 63 print "};\n"; | 69 print "};\n"; |
| 64 my $n = @ranges; | 70 my $n = @ranges; |
| 65 my $escname = $name; | 71 my $escname = $cname; |
| 66 $escname =~ s/\\/\\\\/g; | 72 $escname =~ s/\\/\\\\/g; |
| 67 $negname = $escname; | 73 $negname = $escname; |
| 68 if ($negname =~ /:/) { | 74 if ($negname =~ /:/) { |
| 69 $negname =~ s/:/:^/; | 75 $negname =~ s/:/:^/; |
| 70 } else { | 76 } else { |
| 71 $negname =~ y/a-z/A-Z/; | 77 $negname =~ y/a-z/A-Z/; |
| 72 } | 78 } |
| 73 return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname
, $n }"; | 79 return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum,
$n }"; |
| 74 } | 80 } |
| 75 | 81 |
| 76 my $gen = 0; | 82 my $cnum = 0; |
| 77 | 83 |
| 78 sub PrintClasses($@) { | 84 sub PrintClasses($@) { |
| 79 my ($cname, @classes) = @_; | 85 my ($pname, @classes) = @_; |
| 80 my @entries; | 86 my @entries; |
| 81 foreach my $cl (@classes) { | 87 foreach my $cname (@classes) { |
| 82 my @ranges = ComputeClass($cl); | 88 my @ranges = ComputeClass($cname); |
| 83 push @entries, PrintClass(++$gen, $cl, @ranges); | 89 push @entries, PrintClass(++$cnum, $cname, @ranges); |
| 84 } | 90 } |
| 85 print "UGroup ${cname}_groups[] = {\n"; | 91 print "const UGroup ${pname}_groups[] = {\n"; |
| 86 foreach my $e (@entries) { | 92 foreach my $e (@entries) { |
| 87 print "\t$e,\n"; | 93 print "\t$e,\n"; |
| 88 } | 94 } |
| 89 print "};\n"; | 95 print "};\n"; |
| 90 my $count = @entries; | 96 my $count = @entries; |
| 91 print "int num_${cname}_groups = $count;\n"; | 97 print "const int num_${pname}_groups = $count;\n"; |
| 92 } | 98 } |
| 93 | 99 |
| 94 print <<EOF; | 100 print <<EOF; |
| 95 // GENERATED BY make_perl_groups.pl; DO NOT EDIT. | 101 // GENERATED BY make_perl_groups.pl; DO NOT EDIT. |
| 96 // make_perl_groups.pl >perl_groups.cc | 102 // make_perl_groups.pl >perl_groups.cc |
| 97 | 103 |
| 98 #include "re2/unicode_groups.h" | 104 #include "re2/unicode_groups.h" |
| 99 | 105 |
| 100 namespace re2 { | 106 namespace re2 { |
| 101 | 107 |
| 102 EOF | 108 EOF |
| 103 | 109 |
| 104 PrintClasses("perl", @perlclasses); | 110 PrintClasses("perl", @perlclasses); |
| 105 PrintClasses("posix", @posixclasses); | 111 PrintClasses("posix", @posixclasses); |
| 106 | 112 |
| 107 print <<EOF; | 113 print <<EOF; |
| 108 | 114 |
| 109 } // namespace re2 | 115 } // namespace re2 |
| 110 EOF | 116 EOF |
| OLD | NEW |