Chromium Code Reviews| Index: core/fpdftext/fpdf_text_int.cpp |
| diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp |
| index 8e8686c4a1ac6bcba681f41e5446c829543dfc78..93d35bce6270f4da587309263a9b51361323c109 100644 |
| --- a/core/fpdftext/fpdf_text_int.cpp |
| +++ b/core/fpdftext/fpdf_text_int.cpp |
| @@ -4,8 +4,6 @@ |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| -#include "core/fpdftext/fpdf_text_int.h" |
| - |
| #include <algorithm> |
| #include <cctype> |
| #include <cwctype> |
| @@ -14,15 +12,17 @@ |
| #include <vector> |
| #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" |
| +#include "core/fpdfapi/fpdf_page/include/cpdf_form.h" |
| #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" |
| +#include "core/fpdfapi/fpdf_page/include/cpdf_page.h" |
| #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" |
| #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" |
| #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" |
| #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" |
| -#include "core/fpdftext/include/ipdf_linkextract.h" |
| -#include "core/fpdftext/include/ipdf_textpage.h" |
| -#include "core/fpdftext/include/ipdf_textpagefind.h" |
| -#include "core/fpdftext/unicodenormalization.h" |
| +#include "core/fpdftext/include/cpdf_linkextract.h" |
| +#include "core/fpdftext/include/cpdf_textpage.h" |
| +#include "core/fpdftext/include/cpdf_textpagefind.h" |
| +#include "core/fpdftext/unicodenormalizationdata.h" |
| #include "core/fxcrt/fx_bidi.h" |
| #include "core/fxcrt/include/fx_ext.h" |
| #include "core/fxcrt/include/fx_ucd.h" |
| @@ -36,9 +36,19 @@ |
| #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
| #define FPDFTEXT_CONSECUTIVE 0x00000004 |
| +#define FPDFTEXT_CHAR_ERROR -1 |
| +#define FPDFTEXT_CHAR_NORMAL 0 |
| +#define FPDFTEXT_CHAR_GENERATED 1 |
| +#define FPDFTEXT_CHAR_UNUNICODE 2 |
| +#define FPDFTEXT_CHAR_HYPHEN 3 |
| +#define FPDFTEXT_CHAR_PIECE 4 |
| +#define FPDFTEXT_MC_PASS 0 |
| +#define FPDFTEXT_MC_DONE 1 |
| +#define FPDFTEXT_MC_DELAY 2 |
| + |
| namespace { |
| -FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
| +FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
| if (curChar < 255) { |
| return FALSE; |
| } |
| @@ -55,7 +65,7 @@ FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
| return TRUE; |
| } |
| -FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { |
| +FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) { |
| if (threshold < 300) { |
| return threshold / 2.0f; |
| } |
| @@ -68,8 +78,8 @@ FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { |
| return threshold / 6.0f; |
| } |
| -FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
| - const CFX_Matrix& matrix) { |
| +FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
| + const CFX_Matrix& matrix) { |
| FX_FLOAT baseSpace = 0.0; |
| const int nItems = pTextObj->CountItems(); |
| if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { |
| @@ -94,23 +104,45 @@ FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
| return baseSpace; |
| } |
| -const FX_FLOAT kDefaultFontSize = 1.0f; |
| - |
| -} // namespace |
| +const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { |
|
Tom Sepez
2016/04/18 22:50:13
nit: data should probably go ahead of the function
dsinclair
2016/04/19 13:08:07
Done.
|
| + nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, |
| + g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; |
| -IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
| - int flags) { |
| - return new CPDF_TextPage(pPage, flags); |
| +FX_STRSIZE Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) { |
| + wch = wch & 0xFFFF; |
| + FX_WCHAR wFind = g_UnicodeData_Normalization[wch]; |
| + if (!wFind) { |
| + if (pDst) { |
| + *pDst = wch; |
| + } |
| + return 1; |
| + } |
| + if (wFind >= 0x8000) { |
| + wch = wFind - 0x8000; |
| + wFind = 1; |
| + } else { |
| + wch = wFind & 0x0FFF; |
| + wFind >>= 12; |
| + } |
| + const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind]; |
| + if (pMap == g_UnicodeData_Normalization_Map4) { |
| + pMap = g_UnicodeData_Normalization_Map4 + wch; |
| + wFind = (FX_WCHAR)(*pMap++); |
| + } else { |
| + pMap += wch; |
| + } |
| + if (pDst) { |
| + FX_WCHAR n = wFind; |
| + while (n--) { |
| + *pDst++ = *pMap++; |
| + } |
| + } |
| + return (FX_STRSIZE)wFind; |
| } |
| -IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
| - const IPDF_TextPage* pTextPage) { |
| - return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
| -} |
| +const FX_FLOAT kDefaultFontSize = 1.0f; |
| -IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
| - return new CPDF_LinkExtract(); |
| -} |
| +} // namespace |
| #define TEXT_BLANK_CHAR L' ' |
| #define TEXT_LINEFEED_CHAR L'\n' |
| @@ -932,10 +964,10 @@ void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| info.m_Index = m_TextBuf.GetLength(); |
| if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
| FX_WCHAR* pDst = NULL; |
| - FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| + FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
| if (nCount >= 1) { |
| pDst = FX_Alloc(FX_WCHAR, nCount); |
| - FX_Unicode_GetNormalization(wChar, pDst); |
| + Unicode_GetNormalization(wChar, pDst); |
| for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| PAGECHAR_INFO info2 = info; |
| info2.m_Unicode = pDst[nIndex]; |
| @@ -960,10 +992,10 @@ void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
| info.m_Index = m_TextBuf.GetLength(); |
| wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
| FX_WCHAR* pDst = NULL; |
| - FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| + FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
| if (nCount >= 1) { |
| pDst = FX_Alloc(FX_WCHAR, nCount); |
| - FX_Unicode_GetNormalization(wChar, pDst); |
| + Unicode_GetNormalization(wChar, pDst); |
| for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| PAGECHAR_INFO info2 = info; |
| info2.m_Unicode = pDst[nIndex]; |
| @@ -1377,7 +1409,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { |
| m_pPreTextObj = pTextObj; |
| m_perMatrix.Copy(formMatrix); |
| int nItems = pTextObj->CountItems(); |
| - FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
| + FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix); |
| const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
| const FX_BOOL bIsBidiAndMirrorInverse = |
| @@ -1430,7 +1462,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { |
| int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
| threshold = this_width > last_width ? (FX_FLOAT)this_width |
| : (FX_FLOAT)last_width; |
| - threshold = _NormalizeThreshold(threshold); |
| + threshold = NormalizeThreshold(threshold); |
| threshold = fontsize_h * threshold / 1000; |
| } |
| if (threshold && (spacing && spacing >= threshold)) { |
| @@ -1898,7 +1930,7 @@ FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
| return TRUE; |
| } |
| -CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) |
| +CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage) |
| : m_pTextPage(pTextPage), |
| m_flags(0), |
| m_findNextStart(-1), |
| @@ -2054,8 +2086,8 @@ FX_BOOL CPDF_TextPageFind::FindNext() { |
| CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; |
| int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); |
| if (nStartPos == nResultPos && |
| - !(_IsIgnoreSpaceCharacter(lastChar) || |
| - _IsIgnoreSpaceCharacter(curChar))) { |
| + !(IsIgnoreSpaceCharacter(lastChar) || |
| + IsIgnoreSpaceCharacter(curChar))) { |
| bMatch = FALSE; |
| } |
| for (int d = PreResEndPos; d < nResultPos; d++) { |
| @@ -2174,7 +2206,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { |
| while (pos < csWord.GetLength()) { |
| CFX_WideString curStr = csWord.Mid(pos, 1); |
| FX_WCHAR curChar = csWord.GetAt(pos); |
| - if (_IsIgnoreSpaceCharacter(curChar)) { |
| + if (IsIgnoreSpaceCharacter(curChar)) { |
| if (pos > 0 && curChar == 0x2019) { |
| pos++; |
| continue; |
| @@ -2306,7 +2338,7 @@ CPDF_LinkExtract::~CPDF_LinkExtract() { |
| DeleteLinkList(); |
| } |
| -FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { |
| +FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { |
| if (!pTextPage || !pTextPage->IsParsed()) |
| return FALSE; |