OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2010 The Native Client Authors. All rights reserved. | |
3 * Use of this source code is governed by a BSD-style license that can | |
4 * be found in the LICENSE file. | |
5 */ | |
6 | |
7 #include "native_client/src/trusted/plugin/srpc/string_encoding.h" | |
8 | |
9 #include "native_client/src/include/nacl_platform.h" | |
10 | |
11 | |
12 namespace plugin { | |
13 | |
14 // NPAPI requires us to encode byte strings as UTF-8. Unfortunately | |
15 // this is rather inefficient, in terms of both space and time. | |
16 | |
17 bool ByteStringAsUTF8(const char* input, size_t input_byte_count, | |
18 char** result, size_t* result_byte_count) { | |
19 // UTF-8 encoding may result in a 2x size increase at the most. | |
20 // TODO(mseaborn): We could do a pre-scan to get the real size. | |
21 // If we wanted to be faster, we could do a word-by-word pre-scan | |
22 // to check for top-bit-set characters. | |
23 size_t max_output_size = input_byte_count * 2; | |
24 // We include a null terminator for convenience. | |
25 char* output = reinterpret_cast<char*>(malloc(max_output_size + 1)); | |
26 if (output == NULL) { | |
27 return false; | |
28 } | |
29 char* dest_ptr = output; | |
30 for(size_t i = 0; i < input_byte_count; i++) { | |
31 unsigned char ch = input[i]; | |
32 if (ch < 128) { | |
33 // Code results in a one byte encoding. | |
34 *dest_ptr++ = ch; | |
35 } else { | |
36 // Code results in a two byte encoding. | |
37 *dest_ptr++ = 0xc0 | (ch >> 6); /* Top 2 bits */ | |
38 *dest_ptr++ = 0x80 | (ch & 0x3f); /* Bottom 6 bits */ | |
39 } | |
40 } | |
41 *dest_ptr = 0; | |
42 *result = output; | |
43 *result_byte_count = dest_ptr - output; | |
44 return true; | |
45 } | |
46 | |
47 bool ByteStringFromUTF8(const char* input, size_t input_byte_count, | |
48 char** result, size_t* result_byte_count) { | |
49 // The output cannot be larger than the input. | |
50 char* output = reinterpret_cast<char*>(malloc(input_byte_count + 1)); | |
51 if (output == NULL) { | |
52 return NULL; | |
53 } | |
54 char* dest_ptr = output; | |
55 size_t i; | |
56 for(i = 0; i < input_byte_count; ) { | |
57 unsigned char ch = input[i]; | |
58 if ((ch & 0x80) == 0) { | |
59 // One byte encoding. | |
60 *dest_ptr++ = ch; | |
61 i++; | |
62 } else { | |
63 if (i == input_byte_count - 1) { | |
64 // Invalid UTF-8: incomplete sequence. | |
65 goto fail; | |
66 } | |
67 // Check that this is a two byte encoding. | |
68 // The first character must contain 110xxxxxb and the | |
69 // second must contain 10xxxxxxb. | |
70 unsigned char ch2 = input[i + 1]; | |
71 if ((ch & 0xe0) != 0xc0) { | |
72 // >=2 byte encoding. | |
73 goto fail; | |
74 } | |
75 if ((ch2 & 0xc0) != 0x80) { | |
76 // Invalid UTF-8. | |
77 goto fail; | |
78 } | |
79 uint32_t value = (((uint32_t) ch & 0x1f) << 6) | ((uint32_t) ch2 & 0x3f); | |
80 if (value < 128) { | |
81 // Invalid UTF-8: overly long encoding. | |
82 goto fail; | |
83 } | |
84 if (value >= 0x100) { | |
85 // Overly large character. Will not fit into a byte. | |
86 goto fail; | |
87 } | |
88 *dest_ptr++ = value; | |
89 i += 2; | |
90 } | |
91 } | |
92 *dest_ptr = 0; | |
93 *result = output; | |
94 *result_byte_count = dest_ptr - output; | |
95 return true; | |
96 fail: | |
97 free(output); | |
98 return false; | |
99 } | |
100 | |
101 } // namespace plugin | |
OLD | NEW |