OLD | NEW |
| (Empty) |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | |
2 | |
3 Licensed under the Apache License, Version 2.0 (the "License"); | |
4 you may not use this file except in compliance with the License. | |
5 You may obtain a copy of the License at | |
6 | |
7 http://www.apache.org/licenses/LICENSE-2.0 | |
8 | |
9 Unless required by applicable law or agreed to in writing, software | |
10 distributed under the License is distributed on an "AS IS" BASIS, | |
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 See the License for the specific language governing permissions and | |
13 limitations under the License. | |
14 | |
15 Lookup table to map the previous two bytes to a context id. | |
16 | |
17 There are four different context modeling modes defined here: | |
18 CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, | |
19 CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, | |
20 CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, | |
21 CONTEXT_SIGNED: second-order context model tuned for signed integers. | |
22 | |
23 The context id for the UTF8 context model is calculated as follows. If p1 | |
24 and p2 are the previous two bytes, we calcualte the context as | |
25 | |
26 context = kContextLookup[p1] | kContextLookup[p2 + 256]. | |
27 | |
28 If the previous two bytes are ASCII characters (i.e. < 128), this will be | |
29 equivalent to | |
30 | |
31 context = 4 * context1(p1) + context2(p2), | |
32 | |
33 where context1 is based on the previous byte in the following way: | |
34 | |
35 0 : non-ASCII control | |
36 1 : \t, \n, \r | |
37 2 : space | |
38 3 : other punctuation | |
39 4 : " ' | |
40 5 : % | |
41 6 : ( < [ { | |
42 7 : ) > ] } | |
43 8 : , ; : | |
44 9 : . | |
45 10 : = | |
46 11 : number | |
47 12 : upper-case vowel | |
48 13 : upper-case consonant | |
49 14 : lower-case vowel | |
50 15 : lower-case consonant | |
51 | |
52 and context2 is based on the second last byte: | |
53 | |
54 0 : control, space | |
55 1 : punctuation | |
56 2 : upper-case letter, number | |
57 3 : lower-case letter | |
58 | |
59 If the last byte is ASCII, and the second last byte is not (in a valid UTF8 | |
60 stream it will be a continuation byte, value between 128 and 191), the | |
61 context is the same as if the second last byte was an ASCII control or space. | |
62 | |
63 If the last byte is a UTF8 lead byte (value >= 192), then the next byte will | |
64 be a continuation byte and the context id is 2 or 3 depending on the LSB of | |
65 the last byte and to a lesser extent on the second last byte if it is ASCII. | |
66 | |
67 If the last byte is a UTF8 continuation byte, the second last byte can be: | |
68 - continuation byte: the next byte is probably ASCII or lead byte (assuming | |
69 4-byte UTF8 characters are rare) and the context id is 0 or 1. | |
70 - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 | |
71 - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 | |
72 | |
73 The possible value combinations of the previous two bytes, the range of | |
74 context ids and the type of the next byte is summarized in the table below: | |
75 | |
76 |--------\-----------------------------------------------------------------| | |
77 | \ Last byte | | |
78 | Second \---------------------------------------------------------------| | |
79 | last byte \ ASCII | cont. byte | lead byte | | |
80 | \ (0-127) | (128-191) | (192-) | | |
81 |=============|===================|=====================|==================| | |
82 | ASCII | next: ASCII/lead | not valid | next: cont. | | |
83 | (0-127) | context: 4 - 63 | | context: 2 - 3 | | |
84 |-------------|-------------------|---------------------|------------------| | |
85 | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | | |
86 | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | | |
87 |-------------|-------------------|---------------------|------------------| | |
88 | lead byte | not valid | next: ASCII/lead | not valid | | |
89 | (192-207) | | context: 0 - 1 | | | |
90 |-------------|-------------------|---------------------|------------------| | |
91 | lead byte | not valid | next: cont. | not valid | | |
92 | (208-) | | context: 2 - 3 | | | |
93 |-------------|-------------------|---------------------|------------------| | |
94 | |
95 The context id for the signed context mode is calculated as: | |
96 | |
97 context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2]. | |
98 | |
99 For any context modeling modes, the context ids can be calculated by |-ing | |
100 together two lookups from one table using context model dependent offsets: | |
101 | |
102 context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2]. | |
103 | |
104 where offset1 and offset2 are dependent on the context mode. | |
105 */ | |
106 | |
107 #ifndef BROTLI_DEC_CONTEXT_H_ | |
108 #define BROTLI_DEC_CONTEXT_H_ | |
109 | |
110 #include "./types.h" | |
111 | |
112 enum ContextType { | |
113 CONTEXT_LSB6 = 0, | |
114 CONTEXT_MSB6 = 1, | |
115 CONTEXT_UTF8 = 2, | |
116 CONTEXT_SIGNED = 3 | |
117 }; | |
118 | |
119 /* Common context lookup table for all context modes. */ | |
120 static const uint8_t kContextLookup[1792] = { | |
121 /* CONTEXT_UTF8, last byte. */ | |
122 /* ASCII range. */ | |
123 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, | |
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
125 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, | |
126 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, | |
127 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, | |
128 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, | |
129 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, | |
130 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, | |
131 /* UTF8 continuation byte range. */ | |
132 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
133 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
134 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
135 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, | |
136 /* UTF8 lead byte range. */ | |
137 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
138 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
139 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
140 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, | |
141 /* CONTEXT_UTF8 second last byte. */ | |
142 /* ASCII range. */ | |
143 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
144 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
145 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
146 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, | |
147 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
148 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, | |
149 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
150 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, | |
151 /* UTF8 continuation byte range. */ | |
152 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
153 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
154 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
155 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
156 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
157 /* UTF8 lead byte range. */ | |
158 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
159 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
160 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
161 /* CONTEXT_SIGNED, second last byte. */ | |
162 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
163 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
164 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
167 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
168 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
169 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
170 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
174 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
175 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
176 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
177 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, | |
178 /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ | |
179 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
180 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
181 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
182 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
183 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
184 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
185 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
186 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
187 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
188 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
189 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
190 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | |
191 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, | |
192 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, | |
193 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, | |
194 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, | |
195 /* CONTEXT_LSB6, last byte. */ | |
196 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
197 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
198 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
199 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
200 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
201 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
202 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
203 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
204 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
205 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
206 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
207 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
208 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
209 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
210 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
211 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
212 /* CONTEXT_MSB6, last byte. */ | |
213 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, | |
214 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, | |
215 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, | |
216 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, | |
217 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, | |
218 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, | |
219 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, | |
220 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, | |
221 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, | |
222 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, | |
223 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, | |
224 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, | |
225 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, | |
226 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, | |
227 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, | |
228 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, | |
229 /* CONTEXT_{M,L}SB6, second last byte, */ | |
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
234 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
237 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
240 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
241 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
242 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
243 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
244 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
245 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
246 }; | |
247 | |
248 static const int kContextLookupOffsets[8] = { | |
249 /* CONTEXT_LSB6 */ | |
250 1024, 1536, | |
251 /* CONTEXT_MSB6 */ | |
252 1280, 1536, | |
253 /* CONTEXT_UTF8 */ | |
254 0, 256, | |
255 /* CONTEXT_SIGNED */ | |
256 768, 512, | |
257 }; | |
258 | |
259 #endif /* BROTLI_DEC_CONTEXT_H_ */ | |
OLD | NEW |