OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../include/fxcrt/fx_basic.h" | 7 #include "../../include/fxcrt/fx_basic.h" |
8 void CFX_UTF8Decoder::Clear() | 8 void CFX_UTF8Decoder::Clear() { |
9 { | 9 m_Buffer.Clear(); |
10 m_Buffer.Clear(); | 10 m_PendingBytes = 0; |
| 11 } |
| 12 void CFX_UTF8Decoder::AppendChar(FX_DWORD ch) { |
| 13 m_Buffer.AppendChar((FX_WCHAR)ch); |
| 14 } |
| 15 void CFX_UTF8Decoder::Input(uint8_t byte) { |
| 16 if (byte < 0x80) { |
11 m_PendingBytes = 0; | 17 m_PendingBytes = 0; |
| 18 m_Buffer.AppendChar(byte); |
| 19 } else if (byte < 0xc0) { |
| 20 if (m_PendingBytes == 0) { |
| 21 return; |
| 22 } |
| 23 m_PendingBytes--; |
| 24 m_PendingChar |= (byte & 0x3f) << (m_PendingBytes * 6); |
| 25 if (m_PendingBytes == 0) { |
| 26 AppendChar(m_PendingChar); |
| 27 } |
| 28 } else if (byte < 0xe0) { |
| 29 m_PendingBytes = 1; |
| 30 m_PendingChar = (byte & 0x1f) << 6; |
| 31 } else if (byte < 0xf0) { |
| 32 m_PendingBytes = 2; |
| 33 m_PendingChar = (byte & 0x0f) << 12; |
| 34 } else if (byte < 0xf8) { |
| 35 m_PendingBytes = 3; |
| 36 m_PendingChar = (byte & 0x07) << 18; |
| 37 } else if (byte < 0xfc) { |
| 38 m_PendingBytes = 4; |
| 39 m_PendingChar = (byte & 0x03) << 24; |
| 40 } else if (byte < 0xfe) { |
| 41 m_PendingBytes = 5; |
| 42 m_PendingChar = (byte & 0x01) << 30; |
| 43 } |
12 } | 44 } |
13 void CFX_UTF8Decoder::AppendChar(FX_DWORD ch) | 45 void CFX_UTF8Encoder::Input(FX_WCHAR unicode) { |
14 { | 46 if ((FX_DWORD)unicode < 0x80) { |
15 m_Buffer.AppendChar((FX_WCHAR)ch); | 47 m_Buffer.AppendChar(unicode); |
| 48 } else { |
| 49 if ((FX_DWORD)unicode >= 0x80000000) { |
| 50 return; |
| 51 } |
| 52 int nbytes = 0; |
| 53 if ((FX_DWORD)unicode < 0x800) { |
| 54 nbytes = 2; |
| 55 } else if ((FX_DWORD)unicode < 0x10000) { |
| 56 nbytes = 3; |
| 57 } else if ((FX_DWORD)unicode < 0x200000) { |
| 58 nbytes = 4; |
| 59 } else if ((FX_DWORD)unicode < 0x4000000) { |
| 60 nbytes = 5; |
| 61 } else { |
| 62 nbytes = 6; |
| 63 } |
| 64 static uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
| 65 int order = 1 << ((nbytes - 1) * 6); |
| 66 int code = unicode; |
| 67 m_Buffer.AppendChar(prefix[nbytes - 2] | (code / order)); |
| 68 for (int i = 0; i < nbytes - 1; i++) { |
| 69 code = code % order; |
| 70 order >>= 6; |
| 71 m_Buffer.AppendChar(0x80 | (code / order)); |
| 72 } |
| 73 } |
16 } | 74 } |
17 void CFX_UTF8Decoder::Input(uint8_t byte) | 75 CFX_ByteString FX_UTF8Encode(const FX_WCHAR* pwsStr, FX_STRSIZE len) { |
18 { | 76 FXSYS_assert(pwsStr != NULL); |
19 if (byte < 0x80) { | 77 if (len < 0) { |
20 m_PendingBytes = 0; | 78 len = FXSYS_wcslen(pwsStr); |
21 m_Buffer.AppendChar(byte); | 79 } |
22 } else if (byte < 0xc0) { | 80 CFX_UTF8Encoder encoder; |
23 if (m_PendingBytes == 0) { | 81 while (len-- > 0) { |
24 return; | 82 encoder.Input(*pwsStr++); |
25 } | 83 } |
26 m_PendingBytes --; | 84 return encoder.GetResult(); |
27 m_PendingChar |= (byte & 0x3f) << (m_PendingBytes * 6); | |
28 if (m_PendingBytes == 0) { | |
29 AppendChar(m_PendingChar); | |
30 } | |
31 } else if (byte < 0xe0) { | |
32 m_PendingBytes = 1; | |
33 m_PendingChar = (byte & 0x1f) << 6; | |
34 } else if (byte < 0xf0) { | |
35 m_PendingBytes = 2; | |
36 m_PendingChar = (byte & 0x0f) << 12; | |
37 } else if (byte < 0xf8) { | |
38 m_PendingBytes = 3; | |
39 m_PendingChar = (byte & 0x07) << 18; | |
40 } else if (byte < 0xfc) { | |
41 m_PendingBytes = 4; | |
42 m_PendingChar = (byte & 0x03) << 24; | |
43 } else if (byte < 0xfe) { | |
44 m_PendingBytes = 5; | |
45 m_PendingChar = (byte & 0x01) << 30; | |
46 } | |
47 } | 85 } |
48 void CFX_UTF8Encoder::Input(FX_WCHAR unicode) | |
49 { | |
50 if ((FX_DWORD)unicode < 0x80) { | |
51 m_Buffer.AppendChar(unicode); | |
52 } else { | |
53 if ((FX_DWORD)unicode >= 0x80000000) { | |
54 return; | |
55 } | |
56 int nbytes = 0; | |
57 if ((FX_DWORD)unicode < 0x800) { | |
58 nbytes = 2; | |
59 } else if ((FX_DWORD)unicode < 0x10000) { | |
60 nbytes = 3; | |
61 } else if ((FX_DWORD)unicode < 0x200000) { | |
62 nbytes = 4; | |
63 } else if ((FX_DWORD)unicode < 0x4000000) { | |
64 nbytes = 5; | |
65 } else { | |
66 nbytes = 6; | |
67 } | |
68 static uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; | |
69 int order = 1 << ((nbytes - 1) * 6); | |
70 int code = unicode; | |
71 m_Buffer.AppendChar(prefix[nbytes - 2] | (code / order)); | |
72 for (int i = 0; i < nbytes - 1; i ++) { | |
73 code = code % order; | |
74 order >>= 6; | |
75 m_Buffer.AppendChar(0x80 | (code / order)); | |
76 } | |
77 } | |
78 } | |
79 CFX_ByteString FX_UTF8Encode(const FX_WCHAR* pwsStr, FX_STRSIZE len) | |
80 { | |
81 FXSYS_assert(pwsStr != NULL); | |
82 if (len < 0) { | |
83 len = FXSYS_wcslen(pwsStr); | |
84 } | |
85 CFX_UTF8Encoder encoder; | |
86 while (len -- > 0) { | |
87 encoder.Input(*pwsStr ++); | |
88 } | |
89 return encoder.GetResult(); | |
90 } | |
OLD | NEW |