| OLD | NEW |
| (Empty) |
| 1 // UTFConvert.cpp | |
| 2 | |
| 3 #include "StdAfx.h" | |
| 4 | |
| 5 #include "UTFConvert.h" | |
| 6 #include "Types.h" | |
| 7 | |
| 8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | |
| 9 | |
| 10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_
t srcLen) | |
| 11 { | |
| 12 size_t destPos = 0, srcPos = 0; | |
| 13 for (;;) | |
| 14 { | |
| 15 Byte c; | |
| 16 int numAdds; | |
| 17 if (srcPos == srcLen) | |
| 18 { | |
| 19 *destLen = destPos; | |
| 20 return True; | |
| 21 } | |
| 22 c = (Byte)src[srcPos++]; | |
| 23 | |
| 24 if (c < 0x80) | |
| 25 { | |
| 26 if (dest) | |
| 27 dest[destPos] = (wchar_t)c; | |
| 28 destPos++; | |
| 29 continue; | |
| 30 } | |
| 31 if (c < 0xC0) | |
| 32 break; | |
| 33 for (numAdds = 1; numAdds < 5; numAdds++) | |
| 34 if (c < kUtf8Limits[numAdds]) | |
| 35 break; | |
| 36 UInt32 value = (c - kUtf8Limits[numAdds - 1]); | |
| 37 | |
| 38 do | |
| 39 { | |
| 40 Byte c2; | |
| 41 if (srcPos == srcLen) | |
| 42 break; | |
| 43 c2 = (Byte)src[srcPos++]; | |
| 44 if (c2 < 0x80 || c2 >= 0xC0) | |
| 45 break; | |
| 46 value <<= 6; | |
| 47 value |= (c2 - 0x80); | |
| 48 } | |
| 49 while (--numAdds != 0); | |
| 50 | |
| 51 if (value < 0x10000) | |
| 52 { | |
| 53 if (dest) | |
| 54 dest[destPos] = (wchar_t)value; | |
| 55 destPos++; | |
| 56 } | |
| 57 else | |
| 58 { | |
| 59 value -= 0x10000; | |
| 60 if (value >= 0x100000) | |
| 61 break; | |
| 62 if (dest) | |
| 63 { | |
| 64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); | |
| 65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); | |
| 66 } | |
| 67 destPos += 2; | |
| 68 } | |
| 69 } | |
| 70 *destLen = destPos; | |
| 71 return False; | |
| 72 } | |
| 73 | |
| 74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_
t srcLen) | |
| 75 { | |
| 76 size_t destPos = 0, srcPos = 0; | |
| 77 for (;;) | |
| 78 { | |
| 79 unsigned numAdds; | |
| 80 UInt32 value; | |
| 81 if (srcPos == srcLen) | |
| 82 { | |
| 83 *destLen = destPos; | |
| 84 return True; | |
| 85 } | |
| 86 value = src[srcPos++]; | |
| 87 if (value < 0x80) | |
| 88 { | |
| 89 if (dest) | |
| 90 dest[destPos] = (char)value; | |
| 91 destPos++; | |
| 92 continue; | |
| 93 } | |
| 94 if (value >= 0xD800 && value < 0xE000) | |
| 95 { | |
| 96 UInt32 c2; | |
| 97 if (value >= 0xDC00 || srcPos == srcLen) | |
| 98 break; | |
| 99 c2 = src[srcPos++]; | |
| 100 if (c2 < 0xDC00 || c2 >= 0xE000) | |
| 101 break; | |
| 102 value = ((value - 0xD800) << 10) | (c2 - 0xDC00); | |
| 103 } | |
| 104 for (numAdds = 1; numAdds < 5; numAdds++) | |
| 105 if (value < (((UInt32)1) << (numAdds * 5 + 6))) | |
| 106 break; | |
| 107 if (dest) | |
| 108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))
); | |
| 109 destPos++; | |
| 110 do | |
| 111 { | |
| 112 numAdds--; | |
| 113 if (dest) | |
| 114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); | |
| 115 destPos++; | |
| 116 } | |
| 117 while (numAdds != 0); | |
| 118 } | |
| 119 *destLen = destPos; | |
| 120 return False; | |
| 121 } | |
| 122 | |
| 123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest) | |
| 124 { | |
| 125 dest.Empty(); | |
| 126 size_t destLen = 0; | |
| 127 Utf8_To_Utf16(NULL, &destLen, src, src.Length()); | |
| 128 wchar_t *p = dest.GetBuffer((int)destLen); | |
| 129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length()); | |
| 130 p[destLen] = 0; | |
| 131 dest.ReleaseBuffer(); | |
| 132 return res ? true : false; | |
| 133 } | |
| 134 | |
| 135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest) | |
| 136 { | |
| 137 dest.Empty(); | |
| 138 size_t destLen = 0; | |
| 139 Utf16_To_Utf8(NULL, &destLen, src, src.Length()); | |
| 140 char *p = dest.GetBuffer((int)destLen); | |
| 141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length()); | |
| 142 p[destLen] = 0; | |
| 143 dest.ReleaseBuffer(); | |
| 144 return res ? true : false; | |
| 145 } | |
| OLD | NEW |