Chromium Code Reviews| Index: core/src/fpdftext/fpdf_text_int.cpp |
| diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp |
| index 22591651aba63ebaaf8e4e3355d591ee2f1258bb..2734e68a956eb46e0f8262ad870db6ce10ed8763 100644 |
| --- a/core/src/fpdftext/fpdf_text_int.cpp |
| +++ b/core/src/fpdftext/fpdf_text_int.cpp |
| @@ -134,6 +134,7 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
| return false; |
| } |
| } |
| + |
| FX_BOOL CPDF_TextPage::ParseTextPage() { |
| m_bIsParsed = false; |
| if (!m_pPage) |
| @@ -186,9 +187,11 @@ FX_BOOL CPDF_TextPage::ParseTextPage() { |
| } |
| return TRUE; |
| } |
| + |
| int CPDF_TextPage::CountChars() const { |
| return pdfium::CollectionSize<int>(m_CharList); |
| } |
| + |
| int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
| int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| int count = 0; |
| @@ -199,6 +202,7 @@ int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
| } |
| return -1; |
| } |
| + |
| int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { |
| int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| int count = 0; |
| @@ -213,6 +217,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { |
| } |
| return -1; |
| } |
| + |
| void CPDF_TextPage::GetRectArray(int start, |
| int nCount, |
| CFX_RectArray& rectArray) const { |
| @@ -298,6 +303,7 @@ void CPDF_TextPage::GetRectArray(int start, |
| } |
| rectArray.Add(rect); |
| } |
| + |
| int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
| FX_FLOAT xTolerance, |
| FX_FLOAT yTolerance) const { |
| @@ -378,6 +384,7 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
| } |
| return strText; |
| } |
| + |
| void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
| CFX_RectArray& resRectArray) const { |
| if (!m_bIsParsed) |
| @@ -415,6 +422,7 @@ void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
| } |
| resRectArray.Add(curRect); |
| } |
| + |
| int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
| FX_FLOAT y, |
| FX_FLOAT xTolerance, |
| @@ -483,6 +491,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, |
| nCount = endIndex - start + 1; |
| } |
| } |
| + |
| CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
| if (!m_bIsParsed || nCount == 0) |
| return L""; |
| @@ -532,6 +541,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
| } |
| return m_TextBuf.GetWideString().Mid(startindex, nCount); |
| } |
| + |
| int CPDF_TextPage::CountRects(int start, int nCount) { |
| if (!m_bIsParsed || start < 0) |
| return -1; |
| @@ -544,6 +554,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) { |
| GetRectArray(start, nCount, m_SelRects); |
| return m_SelRects.GetSize(); |
| } |
| + |
| void CPDF_TextPage::GetRect(int rectIndex, |
| FX_FLOAT& left, |
| FX_FLOAT& top, |
| @@ -625,6 +636,7 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
| CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
| return GetBaselineRotate(rect, Rotate); |
| } |
| + |
| int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| FX_FLOAT top, |
| FX_FLOAT right, |
| @@ -702,6 +714,7 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| } |
| return m_Segments.GetSize(); |
| } |
| + |
| void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { |
| if (index < 0 || index >= m_Segments.GetSize()) { |
| return; |
| @@ -709,6 +722,7 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { |
| start = m_Segments.GetAt(index).m_Start; |
| count = m_Segments.GetAt(index).m_nCount; |
| } |
| + |
| int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| if (!m_bIsParsed) |
| return -1; |
| @@ -740,6 +754,7 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| } |
| return breakPos; |
| } |
| + |
| int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
| if (!m_pPage) { |
| return -1; |
| @@ -837,6 +852,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
| } |
| return -1; |
| } |
| + |
| void CPDF_TextPage::ProcessObject() { |
| CPDF_PageObject* pPageObj = NULL; |
| if (!m_pPage) { |
| @@ -870,6 +886,7 @@ void CPDF_TextPage::ProcessObject() { |
| m_LineObj.RemoveAll(); |
| CloseTempLine(); |
| } |
| + |
| void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
| const CFX_Matrix& formMatrix) { |
| CPDF_PageObject* pPageObj = NULL; |
| @@ -896,6 +913,7 @@ void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
| pPageObj = NULL; |
| } |
| } |
| + |
| int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { |
| if (charCode == -1) { |
| return 0; |
| @@ -913,6 +931,7 @@ int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { |
| } |
| return w; |
| } |
| + |
| void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { |
| CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); |
| if (seg.direction == CFX_BidiChar::RIGHT) { |
| @@ -927,6 +946,7 @@ void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { |
| } |
| } |
| } |
| + |
| void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| PAGECHAR_INFO info) { |
| if (!IsControlChar(info)) { |
| @@ -954,6 +974,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| } |
| m_CharList.push_back(info); |
| } |
| + |
| void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
| PAGECHAR_INFO info) { |
| if (!IsControlChar(info)) { |
| @@ -1106,20 +1127,20 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
| CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| CPDF_ContentMarkData* pMarkData = |
| (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| - if (!pMarkData) { |
| + if (!pMarkData) |
| return FPDFTEXT_MC_PASS; |
| - } |
| + |
| int nContentMark = pMarkData->CountItems(); |
| - if (nContentMark < 1) { |
| + if (nContentMark < 1) |
| return FPDFTEXT_MC_PASS; |
| - } |
| CFX_WideString actText; |
| FX_BOOL bExist = FALSE; |
| CPDF_Dictionary* pDict = NULL; |
| int n = 0; |
| for (n = 0; n < nContentMark; n++) { |
| CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| - CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); |
| + if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) |
| + continue; |
| pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); |
|
Lei Zhang
2016/02/12 23:53:27
BTW, can we change GetParam() to return a CPDF_Obj
Wei Li
2016/02/13 01:29:18
Good idea. Now the code looks much cleaner. :)
|
| CPDF_String* temp = |
| ToString(pDict ? pDict->GetElement("ActualText") : nullptr); |
| @@ -1128,38 +1149,32 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
| actText = temp->GetUnicodeText(); |
| } |
| } |
| - if (!bExist) { |
| + if (!bExist) |
| return FPDFTEXT_MC_PASS; |
| - } |
| + |
| if (m_pPreTextObj) { |
| - if (CPDF_ContentMarkData* pPreMarkData = |
| - (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) { |
| - if (pPreMarkData->CountItems() == n) { |
| - CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1); |
| - if (pDict == item.GetParam()) { |
| - return FPDFTEXT_MC_DONE; |
| - } |
| - } |
| + CPDF_ContentMarkData* pPreMarkData = |
| + (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject(); |
| + if (pPreMarkData && pPreMarkData->CountItems() == n && |
| + pDict == pPreMarkData->GetItem(n - 1).GetParam()) { |
| + return FPDFTEXT_MC_DONE; |
| } |
| } |
| - CPDF_Font* pFont = pTextObj->GetFont(); |
| FX_STRSIZE nItems = actText.GetLength(); |
| - if (nItems < 1) { |
| + if (nItems < 1) |
| return FPDFTEXT_MC_PASS; |
| - } |
| + |
| + CPDF_Font* pFont = pTextObj->GetFont(); |
| bExist = FALSE; |
| for (FX_STRSIZE i = 0; i < nItems; i++) { |
| - FX_WCHAR wChar = actText.GetAt(i); |
| - if (-1 == pFont->CharCodeFromUnicode(wChar)) { |
| - continue; |
| - } else { |
| + if (-1 != pFont->CharCodeFromUnicode(actText.GetAt(i))) { |
|
Lei Zhang
2016/02/12 23:53:27
Flip this to foo != -1 while we are here?
Wei Li
2016/02/13 01:29:18
Done.
|
| bExist = TRUE; |
| break; |
| } |
| } |
| - if (!bExist) { |
| + if (!bExist) |
| return FPDFTEXT_MC_PASS; |
| - } |
| + |
| bExist = FALSE; |
| for (FX_STRSIZE i = 0; i < nItems; i++) { |
| FX_WCHAR wChar = actText.GetAt(i); |
| @@ -1168,39 +1183,36 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
| break; |
| } |
| } |
| - if (!bExist) { |
| + if (!bExist) |
| return FPDFTEXT_MC_DONE; |
| - } |
| + |
| return FPDFTEXT_MC_DELAY; |
| } |
| + |
| void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { |
| CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| CPDF_ContentMarkData* pMarkData = |
| (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| - if (!pMarkData) { |
| + if (!pMarkData) |
| return; |
| - } |
| + |
| int nContentMark = pMarkData->CountItems(); |
| - if (nContentMark < 1) { |
| + if (nContentMark < 1) |
| return; |
| - } |
| CFX_WideString actText; |
| CPDF_Dictionary* pDict = NULL; |
| - int n = 0; |
| - for (n = 0; n < nContentMark; n++) { |
| + for (int n = 0; n < nContentMark; n++) { |
| CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| - CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); |
| + if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) |
| + continue; |
| pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); |
| - CPDF_String* temp = |
| - ToString(pDict ? pDict->GetElement("ActualText") : nullptr); |
| - if (temp) { |
| - actText = temp->GetUnicodeText(); |
| - } |
| + if (pDict) |
| + actText = pDict->GetUnicodeTextBy("ActualText"); |
| } |
| FX_STRSIZE nItems = actText.GetLength(); |
| - if (nItems < 1) { |
| + if (nItems < 1) |
| return; |
| - } |
| + |
| CPDF_Font* pFont = pTextObj->GetFont(); |
| CFX_Matrix formMatrix = Obj.m_formMatrix; |
| CFX_Matrix matrix; |
| @@ -1239,6 +1251,7 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { |
| m_TempCharList.push_back(charinfo); |
| } |
| } |
| + |
| void CPDF_TextPage::FindPreviousTextObject() { |
| if (m_TempCharList.empty() && m_CharList.empty()) |
| return; |
| @@ -1549,6 +1562,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { |
| SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
| } |
| } |
| + |
| int32_t CPDF_TextPage::GetTextObjectWritingMode( |
| const CPDF_TextObject* pTextObj) { |
| int32_t nChars = pTextObj->CountChars(); |
| @@ -1614,6 +1628,7 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { |
| } |
| return FALSE; |
| } |
| + |
| int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, |
| const CFX_Matrix& formMatrix) { |
| FindPreviousTextObject(); |
| @@ -1770,6 +1785,7 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, |
| } |
| return 0; |
| } |
| + |
| FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, |
| CPDF_TextObject* pTextObj2) { |
| if (!pTextObj1 || !pTextObj2) { |
| @@ -1827,6 +1843,7 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, |
| } |
| return TRUE; |
| } |
| + |
| FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
| FX_POSITION ObjPos) { |
| if (!pTextObj) { |
| @@ -1890,6 +1907,7 @@ FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, |
| rect.Intersect(rect2); |
| return !rect.IsEmpty(); |
| } |
| + |
| FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
| if (unicode < L'A') { |
| return FALSE; |
| @@ -1902,6 +1920,7 @@ FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
| } |
| return TRUE; |
| } |
| + |
| CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) |
| : m_pTextPage(pTextPage), |
| m_flags(0), |
| @@ -1946,9 +1965,11 @@ CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) |
| m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); |
| } |
| } |
| + |
| int CPDF_TextPageFind::GetCharIndex(int index) const { |
| return m_pTextPage->CharIndexFromTextIndex(index); |
| } |
| + |
| FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, |
| int flags, |
| int startPos) { |
| @@ -1999,6 +2020,7 @@ FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, |
| m_resEnd = -1; |
| return TRUE; |
| } |
| + |
| FX_BOOL CPDF_TextPageFind::FindNext() { |
| if (!m_pTextPage) { |
| return FALSE; |
| @@ -2106,6 +2128,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() { |
| } |
| return m_IsFind; |
| } |
| + |
| FX_BOOL CPDF_TextPageFind::FindPrev() { |
| if (!m_pTextPage) { |
| return FALSE; |
| @@ -2151,6 +2174,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() { |
| } |
| return m_IsFind; |
| } |
| + |
| void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { |
| if (findwhat.IsEmpty()) { |
| return; |
| @@ -2199,6 +2223,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { |
| index++; |
| } |
| } |
| + |
| FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, |
| int startPos, |
| int endPos) { |
| @@ -2245,6 +2270,7 @@ FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, |
| } |
| return TRUE; |
| } |
| + |
| FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, |
| const FX_WCHAR* lpszFullString, |
| int iSubString, |
| @@ -2272,6 +2298,7 @@ FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, |
| rString.ReleaseBuffer(); |
| return TRUE; |
| } |
| + |
| CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { |
| CFX_WideString str2; |
| str2.Empty(); |
| @@ -2281,12 +2308,15 @@ CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { |
| } |
| return str2; |
| } |
| + |
| void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const { |
| rects.Copy(m_resArray); |
| } |
| + |
| int CPDF_TextPageFind::GetCurOrder() const { |
| return GetCharIndex(m_resStart); |
| } |
| + |
| int CPDF_TextPageFind::GetMatchedCount() const { |
| int resStart = GetCharIndex(m_resStart); |
| int resEnd = GetCharIndex(m_resEnd); |
| @@ -2325,12 +2355,14 @@ void CPDF_LinkExtract::DeleteLinkList() { |
| } |
| m_LinkList.RemoveAll(); |
| } |
| + |
| int CPDF_LinkExtract::CountLinks() const { |
| if (!m_bIsParsed) { |
| return -1; |
| } |
| return m_LinkList.GetSize(); |
| } |
| + |
| void CPDF_LinkExtract::ParseLink() { |
| int start = 0, pos = 0; |
| int TotalChar = m_pTextPage->CountChars(); |
| @@ -2366,6 +2398,7 @@ void CPDF_LinkExtract::ParseLink() { |
| } |
| } |
| } |
| + |
| FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
| CFX_WideString str = strBeCheck; |
| str.MakeLower(); |
| @@ -2392,6 +2425,7 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
| } |
| return FALSE; |
| } |
| + |
| bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
| int aPos = str.Find(L'@'); |
| // Invalid when no '@'. |
| @@ -2496,6 +2530,7 @@ void CPDF_LinkExtract::GetBoundedSegment(int index, |
| start = link->m_Start; |
| count = link->m_Count; |
| } |
| + |
| void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
| if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| return; |