| Index: third_party/re2/re2/make_perl_groups.pl
|
| diff --git a/third_party/re2/re2/make_perl_groups.pl b/third_party/re2/re2/make_perl_groups.pl
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..d5eaa59851100d90894f35f6103c4e7bc3269afb
|
| --- /dev/null
|
| +++ b/third_party/re2/re2/make_perl_groups.pl
|
| @@ -0,0 +1,110 @@
|
| +#!/usr/bin/perl
|
| +# Copyright 2008 The RE2 Authors. All Rights Reserved.
|
| +# Use of this source code is governed by a BSD-style
|
| +# license that can be found in the LICENSE file.
|
| +
|
| +# Generate table entries giving character ranges
|
| +# for POSIX/Perl character classes. Rather than
|
| +# figure out what the definition is, it is easier to ask
|
| +# Perl about each letter from 0-128 and write down
|
| +# its answer.
|
| +
|
| +@posixclasses = (
|
| + "[:alnum:]",
|
| + "[:alpha:]",
|
| + "[:ascii:]",
|
| + "[:blank:]",
|
| + "[:cntrl:]",
|
| + "[:digit:]",
|
| + "[:graph:]",
|
| + "[:lower:]",
|
| + "[:print:]",
|
| + "[:punct:]",
|
| + "[:space:]",
|
| + "[:upper:]",
|
| + "[:word:]",
|
| + "[:xdigit:]",
|
| +);
|
| +
|
| +@perlclasses = (
|
| + "\\d",
|
| + "\\s",
|
| + "\\w",
|
| +);
|
| +
|
| +sub ComputeClass($) {
|
| + my @ranges;
|
| + my ($class) = @_;
|
| + my $regexp = "[$class]";
|
| + my $start = -1;
|
| + for (my $i=0; $i<=129; $i++) {
|
| + if ($i == 129) { $i = 256; }
|
| + if ($i <= 128 && chr($i) =~ $regexp) {
|
| + if ($start < 0) {
|
| + $start = $i;
|
| + }
|
| + } else {
|
| + if ($start >= 0) {
|
| + push @ranges, [$start, $i-1];
|
| + }
|
| + $start = -1;
|
| + }
|
| + }
|
| + return @ranges;
|
| +}
|
| +
|
| +sub PrintClass($$@) {
|
| + my ($cname, $name, @ranges) = @_;
|
| + print "static URange16 code${cname}[] = { /* $name */\n";
|
| + for (my $i=0; $i<@ranges; $i++) {
|
| + my @a = @{$ranges[$i]};
|
| + printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
|
| + }
|
| + print "};\n";
|
| + my $n = @ranges;
|
| + my $escname = $name;
|
| + $escname =~ s/\\/\\\\/g;
|
| + $negname = $escname;
|
| + if ($negname =~ /:/) {
|
| + $negname =~ s/:/:^/;
|
| + } else {
|
| + $negname =~ y/a-z/A-Z/;
|
| + }
|
| + return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }";
|
| +}
|
| +
|
| +my $gen = 0;
|
| +
|
| +sub PrintClasses($@) {
|
| + my ($cname, @classes) = @_;
|
| + my @entries;
|
| + foreach my $cl (@classes) {
|
| + my @ranges = ComputeClass($cl);
|
| + push @entries, PrintClass(++$gen, $cl, @ranges);
|
| + }
|
| + print "UGroup ${cname}_groups[] = {\n";
|
| + foreach my $e (@entries) {
|
| + print "\t$e,\n";
|
| + }
|
| + print "};\n";
|
| + my $count = @entries;
|
| + print "int num_${cname}_groups = $count;\n";
|
| +}
|
| +
|
| +print <<EOF;
|
| +// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
|
| +// make_perl_groups.pl >perl_groups.cc
|
| +
|
| +#include "re2/unicode_groups.h"
|
| +
|
| +namespace re2 {
|
| +
|
| +EOF
|
| +
|
| +PrintClasses("perl", @perlclasses);
|
| +PrintClasses("posix", @posixclasses);
|
| +
|
| +print <<EOF;
|
| +
|
| +} // namespace re2
|
| +EOF
|
|
|