| Index: xfa/fgas/crt/fgas_codepage.cpp
|
| diff --git a/xfa/fgas/crt/fgas_codepage.cpp b/xfa/fgas/crt/fgas_codepage.cpp
|
| index 7362a89d30391ea846c6793057a1c54518f5a7b8..2087cac1384169c74bf04f8982f76237614fea31 100644
|
| --- a/xfa/fgas/crt/fgas_codepage.cpp
|
| +++ b/xfa/fgas/crt/fgas_codepage.cpp
|
| @@ -8,7 +8,24 @@
|
| #include "xfa/fgas/crt/fgas_codepage.h"
|
| #include "xfa/fgas/crt/fgas_language.h"
|
|
|
| -static const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
|
| +namespace {
|
| +
|
| +struct FX_STR2CPHASH {
|
| + uint32_t uHash;
|
| + uint16_t uCodePage;
|
| +};
|
| +
|
| +struct FX_CHARSET_MAP {
|
| + uint16_t charset;
|
| + uint16_t codepage;
|
| +};
|
| +
|
| +struct FX_LANG2CPMAP {
|
| + uint16_t wLanguage;
|
| + uint16_t wCodepage;
|
| +};
|
| +
|
| +const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
|
| {0, 1252}, {1, 0}, {2, 42}, {77, 10000}, {78, 10001},
|
| {79, 10003}, {80, 10008}, {81, 10002}, {83, 10005}, {84, 10004},
|
| {85, 10006}, {86, 10081}, {87, 10021}, {88, 10029}, {89, 10007},
|
| @@ -17,24 +34,8 @@ static const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
|
| {186, 1257}, {204, 1251}, {222, 874}, {238, 1250}, {254, 437},
|
| {255, 850},
|
| };
|
| -uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
|
| - int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1;
|
| - ASSERT(iEnd >= 0);
|
| - int32_t iStart = 0, iMid;
|
| - do {
|
| - iMid = (iStart + iEnd) / 2;
|
| - const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid];
|
| - if (charset == cp.charset) {
|
| - return cp.codepage;
|
| - } else if (charset < cp.charset) {
|
| - iEnd = iMid - 1;
|
| - } else {
|
| - iStart = iMid + 1;
|
| - }
|
| - } while (iStart <= iEnd);
|
| - return 0xFFFF;
|
| -}
|
| -static const FX_CHARSET_MAP g_FXCodepage2CharsetTable[] = {
|
| +
|
| +const FX_CHARSET_MAP g_FXCodepage2CharsetTable[] = {
|
| {1, 0}, {2, 42}, {254, 437}, {255, 850}, {222, 874},
|
| {128, 932}, {134, 936}, {129, 949}, {136, 950}, {238, 1250},
|
| {204, 1251}, {0, 1252}, {161, 1253}, {162, 1254}, {177, 1255},
|
| @@ -43,23 +44,7 @@ static const FX_CHARSET_MAP g_FXCodepage2CharsetTable[] = {
|
| {84, 10004}, {85, 10006}, {86, 10081}, {87, 10021}, {88, 10029},
|
| {89, 10007},
|
| };
|
| -uint16_t FX_GetCharsetFromCodePage(uint16_t codepage) {
|
| - int32_t iEnd = sizeof(g_FXCodepage2CharsetTable) / sizeof(FX_CHARSET_MAP) - 1;
|
| - ASSERT(iEnd >= 0);
|
| - int32_t iStart = 0, iMid;
|
| - do {
|
| - iMid = (iStart + iEnd) / 2;
|
| - const FX_CHARSET_MAP& cp = g_FXCodepage2CharsetTable[iMid];
|
| - if (codepage == cp.codepage) {
|
| - return cp.charset;
|
| - } else if (codepage < cp.codepage) {
|
| - iEnd = iMid - 1;
|
| - } else {
|
| - iStart = iMid + 1;
|
| - }
|
| - } while (iStart <= iEnd);
|
| - return 0xFFFF;
|
| -}
|
| +
|
| const FX_LANG2CPMAP g_FXLang2CodepageTable[] = {
|
| {FX_LANG_Arabic_SaudiArabia, FX_CODEPAGE_MSWin_Arabic},
|
| {FX_LANG_Bulgarian_Bulgaria, FX_CODEPAGE_MSWin_Cyrillic},
|
| @@ -197,24 +182,8 @@ const FX_LANG2CPMAP g_FXLang2CodepageTable[] = {
|
| {FX_LANG_Spanish_Nicaragua, FX_CODEPAGE_MSWin_WesternEuropean},
|
| {FX_LANG_Spanish_PuertoRico, FX_CODEPAGE_MSWin_WesternEuropean},
|
| };
|
| -uint16_t FX_GetDefCodePageByLanguage(uint16_t wLanguage) {
|
| - int32_t iEnd = sizeof(g_FXLang2CodepageTable) / sizeof(FX_LANG2CPMAP) - 1;
|
| - ASSERT(iEnd >= 0);
|
| - int32_t iStart = 0, iMid;
|
| - do {
|
| - iMid = (iStart + iEnd) / 2;
|
| - const FX_LANG2CPMAP& cp = g_FXLang2CodepageTable[iMid];
|
| - if (wLanguage == cp.wLanguage) {
|
| - return cp.wCodepage;
|
| - } else if (wLanguage < cp.wLanguage) {
|
| - iEnd = iMid - 1;
|
| - } else {
|
| - iStart = iMid + 1;
|
| - }
|
| - } while (iStart <= iEnd);
|
| - return 0xFFFF;
|
| -}
|
| -static const FX_STR2CPHASH g_FXCPHashTable[] = {
|
| +
|
| +const FX_STR2CPHASH g_FXCPHashTable[] = {
|
| {0xd45, 0x6faf}, {0xd46, 0x6fb0}, {0xd47, 0x6fb1},
|
| {0xd48, 0x6fb2}, {0xd49, 0x4e6}, {0xd4d, 0x6fbd},
|
| {0xe9e, 0x4e4}, {0xc998, 0x1b5}, {0x18ef0, 0x3a8},
|
| @@ -301,7 +270,8 @@ static const FX_STR2CPHASH g_FXCPHashTable[] = {
|
| {0xf3d463c2, 0x3a4}, {0xf52a70a3, 0xc42e}, {0xf5693147, 0x6fb3},
|
| {0xf637e157, 0x478}, {0xfc213f3a, 0x2717}, {0xff654d14, 0x3b5},
|
| };
|
| -uint16_t FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) {
|
| +
|
| +uint16_t GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) {
|
| ASSERT(pStr != NULL);
|
| if (iLength < 0) {
|
| iLength = FXSYS_strlen(pStr);
|
| @@ -326,7 +296,64 @@ uint16_t FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) {
|
| } while (iStart <= iEnd);
|
| return 0xFFFF;
|
| }
|
| -uint16_t FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength) {
|
| +
|
| +} // namespace
|
| +
|
| +uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
|
| + int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1;
|
| + ASSERT(iEnd >= 0);
|
| + int32_t iStart = 0, iMid;
|
| + do {
|
| + iMid = (iStart + iEnd) / 2;
|
| + const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid];
|
| + if (charset == cp.charset) {
|
| + return cp.codepage;
|
| + } else if (charset < cp.charset) {
|
| + iEnd = iMid - 1;
|
| + } else {
|
| + iStart = iMid + 1;
|
| + }
|
| + } while (iStart <= iEnd);
|
| + return 0xFFFF;
|
| +}
|
| +
|
| +uint16_t FX_GetCharsetFromCodePage(uint16_t codepage) {
|
| + int32_t iEnd = sizeof(g_FXCodepage2CharsetTable) / sizeof(FX_CHARSET_MAP) - 1;
|
| + ASSERT(iEnd >= 0);
|
| + int32_t iStart = 0, iMid;
|
| + do {
|
| + iMid = (iStart + iEnd) / 2;
|
| + const FX_CHARSET_MAP& cp = g_FXCodepage2CharsetTable[iMid];
|
| + if (codepage == cp.codepage) {
|
| + return cp.charset;
|
| + } else if (codepage < cp.codepage) {
|
| + iEnd = iMid - 1;
|
| + } else {
|
| + iStart = iMid + 1;
|
| + }
|
| + } while (iStart <= iEnd);
|
| + return 0xFFFF;
|
| +}
|
| +
|
| +uint16_t FX_GetDefCodePageByLanguage(uint16_t wLanguage) {
|
| + int32_t iEnd = sizeof(g_FXLang2CodepageTable) / sizeof(FX_LANG2CPMAP) - 1;
|
| + ASSERT(iEnd >= 0);
|
| + int32_t iStart = 0, iMid;
|
| + do {
|
| + iMid = (iStart + iEnd) / 2;
|
| + const FX_LANG2CPMAP& cp = g_FXLang2CodepageTable[iMid];
|
| + if (wLanguage == cp.wLanguage) {
|
| + return cp.wCodepage;
|
| + } else if (wLanguage < cp.wLanguage) {
|
| + iEnd = iMid - 1;
|
| + } else {
|
| + iStart = iMid + 1;
|
| + }
|
| + } while (iStart <= iEnd);
|
| + return 0xFFFF;
|
| +}
|
| +
|
| +uint16_t FX_GetCodePageFromStringW(const FX_WCHAR* pStr, int32_t iLength) {
|
| if (iLength < 0) {
|
| iLength = FXSYS_wcslen(pStr);
|
| }
|
| @@ -339,5 +366,141 @@ uint16_t FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength) {
|
| *pBuf++ = (FX_CHAR)*pStr++;
|
| }
|
| csStr.ReleaseBuffer(iLength);
|
| - return FX_GetCodePageFromStringA(csStr.c_str(), iLength);
|
| + return GetCodePageFromStringA(csStr.c_str(), iLength);
|
| +}
|
| +
|
| +void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength) {
|
| + ASSERT(pStr != NULL);
|
| + if (iLength < 0) {
|
| + iLength = FXSYS_wcslen(pStr);
|
| + }
|
| + uint16_t wch;
|
| + if (sizeof(FX_WCHAR) > 2) {
|
| + while (iLength-- > 0) {
|
| + wch = (uint16_t)*pStr;
|
| + wch = (wch >> 8) | (wch << 8);
|
| + wch &= 0x00FF;
|
| + *pStr++ = wch;
|
| + }
|
| + } else {
|
| + while (iLength-- > 0) {
|
| + wch = (uint16_t)*pStr;
|
| + wch = (wch >> 8) | (wch << 8);
|
| + *pStr++ = wch;
|
| + }
|
| + }
|
| +}
|
| +
|
| +void FX_UTF16ToWChar(void* pBuffer, int32_t iLength) {
|
| + ASSERT(pBuffer != NULL && iLength > 0);
|
| + if (sizeof(FX_WCHAR) == 2) {
|
| + return;
|
| + }
|
| + uint16_t* pSrc = (uint16_t*)pBuffer;
|
| + FX_WCHAR* pDst = (FX_WCHAR*)pBuffer;
|
| + while (--iLength >= 0) {
|
| + pDst[iLength] = (FX_WCHAR)pSrc[iLength];
|
| + }
|
| +}
|
| +
|
| +void FX_WCharToUTF16(void* pBuffer, int32_t iLength) {
|
| + ASSERT(pBuffer != NULL && iLength > 0);
|
| + if (sizeof(FX_WCHAR) == 2) {
|
| + return;
|
| + }
|
| + const FX_WCHAR* pSrc = (const FX_WCHAR*)pBuffer;
|
| + uint16_t* pDst = (uint16_t*)pBuffer;
|
| + while (--iLength >= 0) {
|
| + *pDst++ = (uint16_t)*pSrc++;
|
| + }
|
| +}
|
| +
|
| +int32_t FX_DecodeString(uint16_t wCodePage,
|
| + const FX_CHAR* pSrc,
|
| + int32_t* pSrcLen,
|
| + FX_WCHAR* pDst,
|
| + int32_t* pDstLen,
|
| + FX_BOOL bErrBreak) {
|
| + if (wCodePage == FX_CODEPAGE_UTF8) {
|
| + return FX_UTF8Decode(pSrc, pSrcLen, pDst, pDstLen);
|
| + }
|
| + return -1;
|
| +}
|
| +int32_t FX_UTF8Decode(const FX_CHAR* pSrc,
|
| + int32_t* pSrcLen,
|
| + FX_WCHAR* pDst,
|
| + int32_t* pDstLen) {
|
| + if (pSrcLen == NULL || pDstLen == NULL) {
|
| + return -1;
|
| + }
|
| + int32_t iSrcLen = *pSrcLen;
|
| + if (iSrcLen < 1) {
|
| + *pSrcLen = *pDstLen = 0;
|
| + return 1;
|
| + }
|
| + int32_t iDstLen = *pDstLen;
|
| + FX_BOOL bValidDst = (pDst != NULL && iDstLen > 0);
|
| + uint32_t dwCode = 0;
|
| + int32_t iPending = 0;
|
| + int32_t iSrcNum = 0, iDstNum = 0;
|
| + int32_t k = 0;
|
| + int32_t iIndex = 0;
|
| + k = 1;
|
| + while (iIndex < iSrcLen) {
|
| + uint8_t byte = (uint8_t) * (pSrc + iIndex);
|
| + if (byte < 0x80) {
|
| + iPending = 0;
|
| + k = 1;
|
| + iDstNum++;
|
| + iSrcNum += k;
|
| + if (bValidDst) {
|
| + *pDst++ = byte;
|
| + if (iDstNum >= iDstLen) {
|
| + break;
|
| + }
|
| + }
|
| + } else if (byte < 0xc0) {
|
| + if (iPending < 1) {
|
| + break;
|
| + }
|
| + iPending--;
|
| + dwCode |= (byte & 0x3f) << (iPending * 6);
|
| + if (iPending == 0) {
|
| + iDstNum++;
|
| + iSrcNum += k;
|
| + if (bValidDst) {
|
| + *pDst++ = dwCode;
|
| + if (iDstNum >= iDstLen) {
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + } else if (byte < 0xe0) {
|
| + iPending = 1;
|
| + k = 2;
|
| + dwCode = (byte & 0x1f) << 6;
|
| + } else if (byte < 0xf0) {
|
| + iPending = 2;
|
| + k = 3;
|
| + dwCode = (byte & 0x0f) << 12;
|
| + } else if (byte < 0xf8) {
|
| + iPending = 3;
|
| + k = 4;
|
| + dwCode = (byte & 0x07) << 18;
|
| + } else if (byte < 0xfc) {
|
| + iPending = 4;
|
| + k = 5;
|
| + dwCode = (byte & 0x03) << 24;
|
| + } else if (byte < 0xfe) {
|
| + iPending = 5;
|
| + k = 6;
|
| + dwCode = (byte & 0x01) << 30;
|
| + } else {
|
| + break;
|
| + }
|
| + iIndex++;
|
| + }
|
| + *pSrcLen = iSrcNum;
|
| + *pDstLen = iDstNum;
|
| + return 1;
|
| }
|
|
|