Index: third_party/re2/re2/make_unicode_groups.py |
diff --git a/third_party/re2/re2/make_unicode_groups.py b/third_party/re2/re2/make_unicode_groups.py |
deleted file mode 100755 |
index 8499793fa740cc6b749e42d7247219f4f2c5410a..0000000000000000000000000000000000000000 |
--- a/third_party/re2/re2/make_unicode_groups.py |
+++ /dev/null |
@@ -1,111 +0,0 @@ |
-#!/usr/bin/python |
-# Copyright 2008 The RE2 Authors. All Rights Reserved. |
-# Use of this source code is governed by a BSD-style |
-# license that can be found in the LICENSE file. |
- |
-"""Generate C++ tables for Unicode Script and Category groups.""" |
- |
-import sys |
-import unicode |
- |
-_header = """ |
-// GENERATED BY make_unicode_groups.py; DO NOT EDIT. |
-// make_unicode_groups.py >unicode_groups.cc |
- |
-#include "re2/unicode_groups.h" |
- |
-namespace re2 { |
- |
-""" |
- |
-_trailer = """ |
- |
-} // namespace re2 |
- |
-""" |
- |
-n16 = 0 |
-n32 = 0 |
- |
-def MakeRanges(codes): |
- """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]""" |
- ranges = [] |
- last = -100 |
- for c in codes: |
- if c == last+1: |
- ranges[-1][1] = c |
- else: |
- ranges.append([c, c]) |
- last = c |
- return ranges |
- |
-def PrintRanges(type, name, ranges): |
- """Print the ranges as an array of type named name.""" |
- print "static const %s %s[] = {" % (type, name,) |
- for lo, hi in ranges: |
- print "\t{ %d, %d }," % (lo, hi) |
- print "};" |
- |
-# def PrintCodes(type, name, codes): |
-# """Print the codes as an array of type named name.""" |
-# print "static %s %s[] = {" % (type, name,) |
-# for c in codes: |
-# print "\t%d," % (c,) |
-# print "};" |
- |
-def PrintGroup(name, codes): |
- """Print the data structures for the group of codes. |
- Return a UGroup literal for the group.""" |
- |
- # See unicode_groups.h for a description of the data structure. |
- |
- # Split codes into 16-bit ranges and 32-bit ranges. |
- range16 = MakeRanges([c for c in codes if c < 65536]) |
- range32 = MakeRanges([c for c in codes if c >= 65536]) |
- |
- # Pull singleton ranges out of range16. |
- # code16 = [lo for lo, hi in range16 if lo == hi] |
- # range16 = [[lo, hi] for lo, hi in range16 if lo != hi] |
- |
- global n16 |
- global n32 |
- n16 += len(range16) |
- n32 += len(range32) |
- |
- ugroup = "{ \"%s\", +1" % (name,) |
- # if len(code16) > 0: |
- # PrintCodes("uint16", name+"_code16", code16) |
- # ugroup += ", %s_code16, %d" % (name, len(code16)) |
- # else: |
- # ugroup += ", 0, 0" |
- if len(range16) > 0: |
- PrintRanges("URange16", name+"_range16", range16) |
- ugroup += ", %s_range16, %d" % (name, len(range16)) |
- else: |
- ugroup += ", 0, 0" |
- if len(range32) > 0: |
- PrintRanges("URange32", name+"_range32", range32) |
- ugroup += ", %s_range32, %d" % (name, len(range32)) |
- else: |
- ugroup += ", 0, 0" |
- ugroup += " }" |
- return ugroup |
- |
-def main(): |
- print _header |
- ugroups = [] |
- for name, codes in unicode.Categories().iteritems(): |
- ugroups.append(PrintGroup(name, codes)) |
- for name, codes in unicode.Scripts().iteritems(): |
- ugroups.append(PrintGroup(name, codes)) |
- print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32) |
- print "const UGroup unicode_groups[] = {"; |
- ugroups.sort() |
- for ug in ugroups: |
- print "\t%s," % (ug,) |
- print "};" |
- print "const int num_unicode_groups = %d;" % (len(ugroups),) |
- print _trailer |
- |
-if __name__ == '__main__': |
- main() |