Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: third_party/brotli/enc/context.h

Issue 1956893002: Added brotli enc/ and tools/ directories. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Updated to most recent build tools Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/brotli/enc/compressor.h ('k') | third_party/brotli/enc/dictionary.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 // Functions to map previous bytes into a context id.
8
9 #ifndef BROTLI_ENC_CONTEXT_H_
10 #define BROTLI_ENC_CONTEXT_H_
11
12 #include "./types.h"
13
14 namespace brotli {
15
16 // Second-order context lookup table for UTF8 byte streams.
17 //
18 // If p1 and p2 are the previous two bytes, we calculate the context as
19 //
20 // context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
21 //
22 // If the previous two bytes are ASCII characters (i.e. < 128), this will be
23 // equivalent to
24 //
25 // context = 4 * context1(p1) + context2(p2),
26 //
27 // where context1 is based on the previous byte in the following way:
28 //
29 // 0 : non-ASCII control
30 // 1 : \t, \n, \r
31 // 2 : space
32 // 3 : other punctuation
33 // 4 : " '
34 // 5 : %
35 // 6 : ( < [ {
36 // 7 : ) > ] }
37 // 8 : , ; :
38 // 9 : .
39 // 10 : =
40 // 11 : number
41 // 12 : upper-case vowel
42 // 13 : upper-case consonant
43 // 14 : lower-case vowel
44 // 15 : lower-case consonant
45 //
46 // and context2 is based on the second last byte:
47 //
48 // 0 : control, space
49 // 1 : punctuation
50 // 2 : upper-case letter, number
51 // 3 : lower-case letter
52 //
53 // If the last byte is ASCII, and the second last byte is not (in a valid UTF8
54 // stream it will be a continuation byte, value between 128 and 191), the
55 // context is the same as if the second last byte was an ASCII control or space.
56 //
57 // If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
58 // be a continuation byte and the context id is 2 or 3 depending on the LSB of
59 // the last byte and to a lesser extent on the second last byte if it is ASCII.
60 //
61 // If the last byte is a UTF8 continuation byte, the second last byte can be:
62 // - continuation byte: the next byte is probably ASCII or lead byte (assuming
63 // 4-byte UTF8 characters are rare) and the context id is 0 or 1.
64 // - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
65 // - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
66 //
67 // The possible value combinations of the previous two bytes, the range of
68 // context ids and the type of the next byte is summarized in the table below:
69 //
70 // |--------\-----------------------------------------------------------------|
71 // | \ Last byte |
72 // | Second \---------------------------------------------------------------|
73 // | last byte \ ASCII | cont. byte | lead byte |
74 // | \ (0-127) | (128-191) | (192-) |
75 // |=============|===================|=====================|==================|
76 // | ASCII | next: ASCII/lead | not valid | next: cont. |
77 // | (0-127) | context: 4 - 63 | | context: 2 - 3 |
78 // |-------------|-------------------|---------------------|------------------|
79 // | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
80 // | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
81 // |-------------|-------------------|---------------------|------------------|
82 // | lead byte | not valid | next: ASCII/lead | not valid |
83 // | (192-207) | | context: 0 - 1 | |
84 // |-------------|-------------------|---------------------|------------------|
85 // | lead byte | not valid | next: cont. | not valid |
86 // | (208-) | | context: 2 - 3 | |
87 // |-------------|-------------------|---------------------|------------------|
88 static const uint8_t kUTF8ContextLookup[512] = {
89 // Last byte.
90 //
91 // ASCII range.
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
95 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
96 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
97 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
98 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
99 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
100 // UTF8 continuation byte range.
101 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
102 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
103 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
104 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
105 // UTF8 lead byte range.
106 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
107 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
108 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
109 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
110 // Second last byte.
111 //
112 // ASCII range.
113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
119 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
121 // UTF8 continuation byte range.
122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 // UTF8 lead byte range.
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 };
132
133 // Context lookup table for small signed integers.
134 static const uint8_t kSigned3BitContextLookup[] = {
135 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
140 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
142 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
145 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
151 };
152
153 enum ContextType {
154 CONTEXT_LSB6 = 0,
155 CONTEXT_MSB6 = 1,
156 CONTEXT_UTF8 = 2,
157 CONTEXT_SIGNED = 3
158 };
159
160 static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
161 switch (mode) {
162 case CONTEXT_LSB6:
163 return p1 & 0x3f;
164 case CONTEXT_MSB6:
165 return static_cast<uint8_t>(p1 >> 2);
166 case CONTEXT_UTF8:
167 return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
168 case CONTEXT_SIGNED:
169 return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
170 kSigned3BitContextLookup[p2]);
171 default:
172 return 0;
173 }
174 }
175
176 } // namespace brotli
177
178 #endif // BROTLI_ENC_CONTEXT_H_
OLDNEW
« no previous file with comments | « third_party/brotli/enc/compressor.h ('k') | third_party/brotli/enc/dictionary.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698