OLD | NEW |
| (Empty) |
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Unicode character groups. | |
6 | |
7 // The codes get split into ranges of 16-bit codes | |
8 // and ranges of 32-bit codes. It would be simpler | |
9 // to use only 32-bit ranges, but these tables are large | |
10 // enough to warrant extra care. | |
11 // | |
12 // Using just 32-bit ranges gives 27 kB of data. | |
13 // Adding 16-bit ranges gives 18 kB of data. | |
14 // Adding an extra table of 16-bit singletons would reduce | |
15 // to 16.5 kB of data but make the data harder to use; | |
16 // we don't bother. | |
17 | |
18 #ifndef RE2_UNICODE_GROUPS_H__ | |
19 #define RE2_UNICODE_GROUPS_H__ | |
20 | |
21 #include "util/util.h" | |
22 | |
23 namespace re2 { | |
24 | |
25 struct URange16 | |
26 { | |
27 uint16 lo; | |
28 uint16 hi; | |
29 }; | |
30 | |
31 struct URange32 | |
32 { | |
33 Rune lo; | |
34 Rune hi; | |
35 }; | |
36 | |
37 struct UGroup | |
38 { | |
39 const char *name; | |
40 int sign; // +1 for [abc], -1 for [^abc] | |
41 const URange16 *r16; | |
42 int nr16; | |
43 const URange32 *r32; | |
44 int nr32; | |
45 }; | |
46 | |
47 // Named by property or script name (e.g., "Nd", "N", "Han"). | |
48 // Negated groups are not included. | |
49 extern const UGroup unicode_groups[]; | |
50 extern const int num_unicode_groups; | |
51 | |
52 // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). | |
53 // Negated groups are included. | |
54 extern const UGroup posix_groups[]; | |
55 extern const int num_posix_groups; | |
56 | |
57 // Named by Perl name (e.g., "\\d", "\\D"). | |
58 // Negated groups are included. | |
59 extern const UGroup perl_groups[]; | |
60 extern const int num_perl_groups; | |
61 | |
62 } // namespace re2 | |
63 | |
64 #endif // RE2_UNICODE_GROUPS_H__ | |
OLD | NEW |