Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(555)

Side by Side Diff: third_party/brotli/enc/utf8_util.c

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Fixed typo Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/brotli/enc/utf8_util.h ('k') | third_party/brotli/enc/utf8_util.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright 2013 Google Inc. All Rights Reserved. 1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 2
3 Distributed under MIT license. 3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */ 5 */
6 6
7 // Heuristics for deciding about the UTF8-ness of strings. 7 /* Heuristics for deciding about the UTF8-ness of strings. */
8 8
9 #include "./utf8_util.h" 9 #include "./utf8_util.h"
10 10
11 #include "./types.h" 11 #include <brotli/types.h>
12 12
13 namespace brotli { 13 #if defined(__cplusplus) || defined(c_plusplus)
14 extern "C" {
15 #endif
14 16
15 namespace { 17 static size_t BrotliParseAsUTF8(
16 18 int* symbol, const uint8_t* input, size_t size) {
17 size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) { 19 /* ASCII */
18 // ASCII
19 if ((input[0] & 0x80) == 0) { 20 if ((input[0] & 0x80) == 0) {
20 *symbol = input[0]; 21 *symbol = input[0];
21 if (*symbol > 0) { 22 if (*symbol > 0) {
22 return 1; 23 return 1;
23 } 24 }
24 } 25 }
25 // 2-byte UTF8 26 /* 2-byte UTF8 */
26 if (size > 1u && 27 if (size > 1u &&
27 (input[0] & 0xe0) == 0xc0 && 28 (input[0] & 0xe0) == 0xc0 &&
28 (input[1] & 0xc0) == 0x80) { 29 (input[1] & 0xc0) == 0x80) {
29 *symbol = (((input[0] & 0x1f) << 6) | 30 *symbol = (((input[0] & 0x1f) << 6) |
30 (input[1] & 0x3f)); 31 (input[1] & 0x3f));
31 if (*symbol > 0x7f) { 32 if (*symbol > 0x7f) {
32 return 2; 33 return 2;
33 } 34 }
34 } 35 }
35 // 3-byte UFT8 36 /* 3-byte UFT8 */
36 if (size > 2u && 37 if (size > 2u &&
37 (input[0] & 0xf0) == 0xe0 && 38 (input[0] & 0xf0) == 0xe0 &&
38 (input[1] & 0xc0) == 0x80 && 39 (input[1] & 0xc0) == 0x80 &&
39 (input[2] & 0xc0) == 0x80) { 40 (input[2] & 0xc0) == 0x80) {
40 *symbol = (((input[0] & 0x0f) << 12) | 41 *symbol = (((input[0] & 0x0f) << 12) |
41 ((input[1] & 0x3f) << 6) | 42 ((input[1] & 0x3f) << 6) |
42 (input[2] & 0x3f)); 43 (input[2] & 0x3f));
43 if (*symbol > 0x7ff) { 44 if (*symbol > 0x7ff) {
44 return 3; 45 return 3;
45 } 46 }
46 } 47 }
47 // 4-byte UFT8 48 /* 4-byte UFT8 */
48 if (size > 3u && 49 if (size > 3u &&
49 (input[0] & 0xf8) == 0xf0 && 50 (input[0] & 0xf8) == 0xf0 &&
50 (input[1] & 0xc0) == 0x80 && 51 (input[1] & 0xc0) == 0x80 &&
51 (input[2] & 0xc0) == 0x80 && 52 (input[2] & 0xc0) == 0x80 &&
52 (input[3] & 0xc0) == 0x80) { 53 (input[3] & 0xc0) == 0x80) {
53 *symbol = (((input[0] & 0x07) << 18) | 54 *symbol = (((input[0] & 0x07) << 18) |
54 ((input[1] & 0x3f) << 12) | 55 ((input[1] & 0x3f) << 12) |
55 ((input[2] & 0x3f) << 6) | 56 ((input[2] & 0x3f) << 6) |
56 (input[3] & 0x3f)); 57 (input[3] & 0x3f));
57 if (*symbol > 0xffff && *symbol <= 0x10ffff) { 58 if (*symbol > 0xffff && *symbol <= 0x10ffff) {
58 return 4; 59 return 4;
59 } 60 }
60 } 61 }
61 // Not UTF8, emit a special symbol above the UTF8-code space 62 /* Not UTF8, emit a special symbol above the UTF8-code space */
62 *symbol = 0x110000 | input[0]; 63 *symbol = 0x110000 | input[0];
63 return 1; 64 return 1;
64 } 65 }
65 66
66 } // namespace 67 /* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
67 68 BROTLI_BOOL BrotliIsMostlyUTF8(
68 // Returns true if at least min_fraction of the data is UTF8-encoded. 69 const uint8_t* data, const size_t pos, const size_t mask,
69 bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask, 70 const size_t length, const double min_fraction) {
70 const size_t length, const double min_fraction) {
71 size_t size_utf8 = 0; 71 size_t size_utf8 = 0;
72 size_t i = 0; 72 size_t i = 0;
73 while (i < length) { 73 while (i < length) {
74 int symbol; 74 int symbol;
75 size_t bytes_read = ParseAsUTF8( 75 size_t bytes_read =
76 &symbol, &data[(pos + i) & mask], length - i); 76 BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
77 i += bytes_read; 77 i += bytes_read;
78 if (symbol < 0x110000) size_utf8 += bytes_read; 78 if (symbol < 0x110000) size_utf8 += bytes_read;
79 } 79 }
80 return size_utf8 > min_fraction * static_cast<double>(length); 80 return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
81 } 81 }
82 82
83 } // namespace brotli 83 #if defined(__cplusplus) || defined(c_plusplus)
84 } /* extern "C" */
85 #endif
OLDNEW
« no previous file with comments | « third_party/brotli/enc/utf8_util.h ('k') | third_party/brotli/enc/utf8_util.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698