Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: third_party/brotli/enc/context.h

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Fixed typo Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/brotli/enc/compressor.h ('k') | third_party/brotli/enc/dictionary.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright 2013 Google Inc. All Rights Reserved. 1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 2
3 Distributed under MIT license. 3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */ 5 */
6 6
7 // Functions to map previous bytes into a context id. 7 /* Functions to map previous bytes into a context id. */
8 8
9 #ifndef BROTLI_ENC_CONTEXT_H_ 9 #ifndef BROTLI_ENC_CONTEXT_H_
10 #define BROTLI_ENC_CONTEXT_H_ 10 #define BROTLI_ENC_CONTEXT_H_
11 11
12 #include "./types.h" 12 #include <brotli/port.h>
13 #include <brotli/types.h>
13 14
14 namespace brotli { 15 #if defined(__cplusplus) || defined(c_plusplus)
16 extern "C" {
17 #endif
15 18
16 // Second-order context lookup table for UTF8 byte streams. 19 /* Second-order context lookup table for UTF8 byte streams.
17 // 20
18 // If p1 and p2 are the previous two bytes, we calculate the context as 21 If p1 and p2 are the previous two bytes, we calculate the context as
19 // 22
20 // context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. 23 context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
21 // 24
22 // If the previous two bytes are ASCII characters (i.e. < 128), this will be 25 If the previous two bytes are ASCII characters (i.e. < 128), this will be
23 // equivalent to 26 equivalent to
24 // 27
25 // context = 4 * context1(p1) + context2(p2), 28 context = 4 * context1(p1) + context2(p2),
26 // 29
27 // where context1 is based on the previous byte in the following way: 30 where context1 is based on the previous byte in the following way:
28 // 31
29 // 0 : non-ASCII control 32 0 : non-ASCII control
30 // 1 : \t, \n, \r 33 1 : \t, \n, \r
31 // 2 : space 34 2 : space
32 // 3 : other punctuation 35 3 : other punctuation
33 // 4 : " ' 36 4 : " '
34 // 5 : % 37 5 : %
35 // 6 : ( < [ { 38 6 : ( < [ {
36 // 7 : ) > ] } 39 7 : ) > ] }
37 // 8 : , ; : 40 8 : , ; :
38 // 9 : . 41 9 : .
39 // 10 : = 42 10 : =
40 // 11 : number 43 11 : number
41 // 12 : upper-case vowel 44 12 : upper-case vowel
42 // 13 : upper-case consonant 45 13 : upper-case consonant
43 // 14 : lower-case vowel 46 14 : lower-case vowel
44 // 15 : lower-case consonant 47 15 : lower-case consonant
45 // 48
46 // and context2 is based on the second last byte: 49 and context2 is based on the second last byte:
47 // 50
48 // 0 : control, space 51 0 : control, space
49 // 1 : punctuation 52 1 : punctuation
50 // 2 : upper-case letter, number 53 2 : upper-case letter, number
51 // 3 : lower-case letter 54 3 : lower-case letter
52 // 55
53 // If the last byte is ASCII, and the second last byte is not (in a valid UTF8 56 If the last byte is ASCII, and the second last byte is not (in a valid UTF8
54 // stream it will be a continuation byte, value between 128 and 191), the 57 stream it will be a continuation byte, value between 128 and 191), the
55 // context is the same as if the second last byte was an ASCII control or space. 58 context is the same as if the second last byte was an ASCII control or space.
56 // 59
57 // If the last byte is a UTF8 lead byte (value >= 192), then the next byte will 60 If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
58 // be a continuation byte and the context id is 2 or 3 depending on the LSB of 61 be a continuation byte and the context id is 2 or 3 depending on the LSB of
59 // the last byte and to a lesser extent on the second last byte if it is ASCII. 62 the last byte and to a lesser extent on the second last byte if it is ASCII.
60 // 63
61 // If the last byte is a UTF8 continuation byte, the second last byte can be: 64 If the last byte is a UTF8 continuation byte, the second last byte can be:
62 // - continuation byte: the next byte is probably ASCII or lead byte (assuming 65 - continuation byte: the next byte is probably ASCII or lead byte (assuming
63 // 4-byte UTF8 characters are rare) and the context id is 0 or 1. 66 4-byte UTF8 characters are rare) and the context id is 0 or 1.
64 // - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 67 - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
65 // - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 68 - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
66 // 69
67 // The possible value combinations of the previous two bytes, the range of 70 The possible value combinations of the previous two bytes, the range of
68 // context ids and the type of the next byte is summarized in the table below: 71 context ids and the type of the next byte is summarized in the table below:
69 // 72
70 // |--------\-----------------------------------------------------------------| 73 |--------\-----------------------------------------------------------------|
71 // | \ Last byte | 74 | \ Last byte |
72 // | Second \---------------------------------------------------------------| 75 | Second \---------------------------------------------------------------|
73 // | last byte \ ASCII | cont. byte | lead byte | 76 | last byte \ ASCII | cont. byte | lead byte |
74 // | \ (0-127) | (128-191) | (192-) | 77 | \ (0-127) | (128-191) | (192-) |
75 // |=============|===================|=====================|==================| 78 |=============|===================|=====================|==================|
76 // | ASCII | next: ASCII/lead | not valid | next: cont. | 79 | ASCII | next: ASCII/lead | not valid | next: cont. |
77 // | (0-127) | context: 4 - 63 | | context: 2 - 3 | 80 | (0-127) | context: 4 - 63 | | context: 2 - 3 |
78 // |-------------|-------------------|---------------------|------------------| 81 |-------------|-------------------|---------------------|------------------|
79 // | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | 82 | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
80 // | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | 83 | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
81 // |-------------|-------------------|---------------------|------------------| 84 |-------------|-------------------|---------------------|------------------|
82 // | lead byte | not valid | next: ASCII/lead | not valid | 85 | lead byte | not valid | next: ASCII/lead | not valid |
83 // | (192-207) | | context: 0 - 1 | | 86 | (192-207) | | context: 0 - 1 | |
84 // |-------------|-------------------|---------------------|------------------| 87 |-------------|-------------------|---------------------|------------------|
85 // | lead byte | not valid | next: cont. | not valid | 88 | lead byte | not valid | next: cont. | not valid |
86 // | (208-) | | context: 2 - 3 | | 89 | (208-) | | context: 2 - 3 | |
87 // |-------------|-------------------|---------------------|------------------| 90 |-------------|-------------------|---------------------|------------------|
91 */
88 static const uint8_t kUTF8ContextLookup[512] = { 92 static const uint8_t kUTF8ContextLookup[512] = {
89 // Last byte. 93 /* Last byte. */
90 // 94 /* */
91 // ASCII range. 95 /* ASCII range. */
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 96 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 98 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
95 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, 99 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
96 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 100 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
97 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 101 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
98 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 102 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
99 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, 103 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
100 // UTF8 continuation byte range. 104 /* UTF8 continuation byte range. */
101 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 105 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
102 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 106 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
103 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 107 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
104 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 108 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
105 // UTF8 lead byte range. 109 /* UTF8 lead byte range. */
106 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 110 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
107 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 111 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
108 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 112 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
109 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 113 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
110 // Second last byte. 114 /* Second last byte. */
111 // 115 /* */
112 // ASCII range. 116 /* ASCII range. */
113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 119 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 120 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 122 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
119 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 123 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, 124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
121 // UTF8 continuation byte range. 125 /* UTF8 continuation byte range. */
122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 // UTF8 lead byte range. 131 /* UTF8 lead byte range. */
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 132 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 133 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 134 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 }; 135 };
132 136
133 // Context lookup table for small signed integers. 137 /* Context lookup table for small signed integers. */
134 static const uint8_t kSigned3BitContextLookup[] = { 138 static const uint8_t kSigned3BitContextLookup[] = {
135 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 139 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 141 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 142 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 143 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
140 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
142 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 148 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
145 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 154 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
151 }; 155 };
152 156
153 enum ContextType { 157 typedef enum ContextType {
154 CONTEXT_LSB6 = 0, 158 CONTEXT_LSB6 = 0,
155 CONTEXT_MSB6 = 1, 159 CONTEXT_MSB6 = 1,
156 CONTEXT_UTF8 = 2, 160 CONTEXT_UTF8 = 2,
157 CONTEXT_SIGNED = 3 161 CONTEXT_SIGNED = 3
158 }; 162 } ContextType;
159 163
160 static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { 164 static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
161 switch (mode) { 165 switch (mode) {
162 case CONTEXT_LSB6: 166 case CONTEXT_LSB6:
163 return p1 & 0x3f; 167 return p1 & 0x3f;
164 case CONTEXT_MSB6: 168 case CONTEXT_MSB6:
165 return static_cast<uint8_t>(p1 >> 2); 169 return (uint8_t)(p1 >> 2);
166 case CONTEXT_UTF8: 170 case CONTEXT_UTF8:
167 return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]; 171 return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
168 case CONTEXT_SIGNED: 172 case CONTEXT_SIGNED:
169 return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) + 173 return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
170 kSigned3BitContextLookup[p2]); 174 kSigned3BitContextLookup[p2]);
171 default: 175 default:
172 return 0; 176 return 0;
173 } 177 }
174 } 178 }
175 179
176 } // namespace brotli 180 #if defined(__cplusplus) || defined(c_plusplus)
181 } /* extern "C" */
182 #endif
177 183
178 #endif // BROTLI_ENC_CONTEXT_H_ 184 #endif /* BROTLI_ENC_CONTEXT_H_ */
OLDNEW
« no previous file with comments | « third_party/brotli/enc/compressor.h ('k') | third_party/brotli/enc/dictionary.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698