| Index: core/fpdftext/fpdf_text_int.cpp
|
| diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp
|
| index 741331fb7711984b851d25053cefc5def0d50828..4db4d5c09fab41e9ee891684e614c8de181e7f6e 100644
|
| --- a/core/fpdftext/fpdf_text_int.cpp
|
| +++ b/core/fpdftext/fpdf_text_int.cpp
|
| @@ -262,7 +262,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
|
|
|
| void CPDF_TextPage::GetRectArray(int start,
|
| int nCount,
|
| - CFX_RectArray& rectArray) const {
|
| + CFX_RectArray* rectArray) const {
|
| if (start < 0 || nCount == 0) {
|
| return;
|
| }
|
| @@ -290,7 +290,7 @@ void CPDF_TextPage::GetRectArray(int start,
|
| pCurObj = info_curchar.m_pTextObj;
|
| }
|
| if (pCurObj != info_curchar.m_pTextObj) {
|
| - rectArray.Add(rect);
|
| + rectArray->Add(rect);
|
| pCurObj = info_curchar.m_pTextObj;
|
| flagNewRect = TRUE;
|
| }
|
| @@ -343,7 +343,7 @@ void CPDF_TextPage::GetRectArray(int start,
|
| }
|
| }
|
| }
|
| - rectArray.Add(rect);
|
| + rectArray->Add(rect);
|
| }
|
|
|
| int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point,
|
| @@ -592,7 +592,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) {
|
| nCount = pdfium::CollectionSize<int>(m_CharList) - start;
|
| }
|
| m_SelRects.RemoveAll();
|
| - GetRectArray(start, nCount, m_SelRects);
|
| + GetRectArray(start, nCount, &m_SelRects);
|
| return m_SelRects.GetSize();
|
| }
|
|
|
| @@ -649,35 +649,6 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
|
| return TRUE;
|
| }
|
|
|
| -FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
|
| - int& Rotate) {
|
| - int start, end, count,
|
| - n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom,
|
| - TRUE);
|
| - if (n < 1) {
|
| - return FALSE;
|
| - }
|
| - if (n > 1) {
|
| - GetBoundedSegment(n - 1, start, count);
|
| - end = start + count - 1;
|
| - GetBoundedSegment(0, start, count);
|
| - } else {
|
| - GetBoundedSegment(0, start, count);
|
| - end = start + count - 1;
|
| - }
|
| - return GetBaselineRotate(start, end, Rotate);
|
| -}
|
| -FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
|
| - if (!m_bIsParsed)
|
| - return FALSE;
|
| -
|
| - if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
|
| - return FALSE;
|
| -
|
| - CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
|
| - return GetBaselineRotate(rect, Rotate);
|
| -}
|
| -
|
| int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
|
| FX_FLOAT top,
|
| FX_FLOAT right,
|
| @@ -756,14 +727,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
|
| return m_Segments.GetSize();
|
| }
|
|
|
| -void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
|
| - if (index < 0 || index >= m_Segments.GetSize()) {
|
| - return;
|
| - }
|
| - start = m_Segments.GetAt(index).m_Start;
|
| - count = m_Segments.GetAt(index).m_nCount;
|
| -}
|
| -
|
| int CPDF_TextPage::GetWordBreak(int index, int direction) const {
|
| if (!m_bIsParsed)
|
| return -1;
|
| @@ -2126,7 +2089,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
|
| m_IsFind = TRUE;
|
| int resStart = GetCharIndex(m_resStart);
|
| int resEnd = GetCharIndex(m_resEnd);
|
| - m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray);
|
| + m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray);
|
| if (m_flags & FPDFTEXT_CONSECUTIVE) {
|
| m_findNextStart = m_resStart + 1;
|
| m_findPreStart = m_resEnd - 1;
|
| @@ -2172,7 +2135,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() {
|
| m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
|
| m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
|
| m_IsFind = TRUE;
|
| - m_pTextPage->GetRectArray(order, MatchedCount, m_resArray);
|
| + m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray);
|
| if (m_flags & FPDFTEXT_CONSECUTIVE) {
|
| m_findNextStart = m_resStart + 1;
|
| m_findPreStart = m_resEnd - 1;
|
| @@ -2330,43 +2293,22 @@ int CPDF_TextPageFind::GetMatchedCount() const {
|
| return resEnd - resStart + 1;
|
| }
|
|
|
| -CPDF_LinkExtract::CPDF_LinkExtract()
|
| - : m_pTextPage(nullptr), m_bIsParsed(false) {}
|
| +CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage)
|
| + : m_pTextPage(pTextPage) {}
|
|
|
| CPDF_LinkExtract::~CPDF_LinkExtract() {
|
| - DeleteLinkList();
|
| }
|
|
|
| -FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) {
|
| - if (!pTextPage || !pTextPage->IsParsed())
|
| - return FALSE;
|
| +void CPDF_LinkExtract::ExtractLinks() {
|
| + m_LinkArray.clear();
|
| + if (!m_pTextPage->IsParsed())
|
| + return;
|
|
|
| - m_pTextPage = (const CPDF_TextPage*)pTextPage;
|
| m_strPageText = m_pTextPage->GetPageText(0, -1);
|
| - DeleteLinkList();
|
| - if (m_strPageText.IsEmpty()) {
|
| - return FALSE;
|
| - }
|
| - ParseLink();
|
| - m_bIsParsed = true;
|
| - return TRUE;
|
| -}
|
| -
|
| -void CPDF_LinkExtract::DeleteLinkList() {
|
| - while (m_LinkList.GetSize()) {
|
| - CPDF_LinkExt* linkinfo = NULL;
|
| - linkinfo = m_LinkList.GetAt(0);
|
| - m_LinkList.RemoveAt(0);
|
| - delete linkinfo;
|
| - }
|
| - m_LinkList.RemoveAll();
|
| -}
|
| + if (m_strPageText.IsEmpty())
|
| + return;
|
|
|
| -int CPDF_LinkExtract::CountLinks() const {
|
| - if (!m_bIsParsed) {
|
| - return -1;
|
| - }
|
| - return m_LinkList.GetSize();
|
| + ParseLink();
|
| }
|
|
|
| void CPDF_LinkExtract::ParseLink() {
|
| @@ -2395,7 +2337,7 @@ void CPDF_LinkExtract::ParseLink() {
|
| }
|
| if (nCount > 5 &&
|
| (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
|
| - AppendToLinkList(start, nCount, strBeCheck);
|
| + m_LinkArray.push_back({start, nCount, strBeCheck});
|
| }
|
| }
|
| start = ++pos;
|
| @@ -2405,47 +2347,46 @@ void CPDF_LinkExtract::ParseLink() {
|
| }
|
| }
|
|
|
| -FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
|
| +bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
|
| CFX_WideString str = strBeCheck;
|
| str.MakeLower();
|
| if (str.Find(L"http://www.") != -1) {
|
| strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
|
| - return TRUE;
|
| + return true;
|
| }
|
| if (str.Find(L"http://") != -1) {
|
| strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
|
| - return TRUE;
|
| + return true;
|
| }
|
| if (str.Find(L"https://www.") != -1) {
|
| strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
|
| - return TRUE;
|
| + return true;
|
| }
|
| if (str.Find(L"https://") != -1) {
|
| strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
|
| - return TRUE;
|
| + return true;
|
| }
|
| if (str.Find(L"www.") != -1) {
|
| strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
|
| strBeCheck = L"http://" + strBeCheck;
|
| - return TRUE;
|
| + return true;
|
| }
|
| - return FALSE;
|
| + return false;
|
| }
|
|
|
| bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
|
| int aPos = str.Find(L'@');
|
| // Invalid when no '@'.
|
| - if (aPos < 1) {
|
| - return FALSE;
|
| - }
|
| + if (aPos < 1)
|
| + return false;
|
|
|
| // Check the local part.
|
| int pPos = aPos; // Used to track the position of '@' or '.'.
|
| for (int i = aPos - 1; i >= 0; i--) {
|
| FX_WCHAR ch = str.GetAt(i);
|
| - if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) {
|
| + if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
|
| continue;
|
| - }
|
| +
|
| if (ch != L'.' || i == pPos - 1 || i == 0) {
|
| if (i == aPos - 1) {
|
| // There is '.' or invalid char before '@'.
|
| @@ -2463,25 +2404,25 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
|
|
|
| // Check the domain name part.
|
| aPos = str.Find(L'@');
|
| - if (aPos < 1) {
|
| - return FALSE;
|
| - }
|
| + if (aPos < 1)
|
| + return false;
|
| +
|
| str.TrimRight(L'.');
|
| // At least one '.' in domain name, but not at the beginning.
|
| // TODO(weili): RFC5322 allows domain names to be a local name without '.'.
|
| // Check whether we should remove this check.
|
| int ePos = str.Find(L'.', aPos + 1);
|
| - if (ePos == -1 || ePos == aPos + 1) {
|
| - return FALSE;
|
| - }
|
| + if (ePos == -1 || ePos == aPos + 1)
|
| + return false;
|
| +
|
| // Validate all other chars in domain name.
|
| int nLen = str.GetLength();
|
| pPos = 0; // Used to track the position of '.'.
|
| for (int i = aPos + 1; i < nLen; i++) {
|
| FX_WCHAR wch = str.GetAt(i);
|
| - if (wch == L'-' || FXSYS_iswalnum(wch)) {
|
| + if (wch == L'-' || FXSYS_iswalnum(wch))
|
| continue;
|
| - }
|
| +
|
| if (wch != L'.' || i == pPos + 1) {
|
| // Domain name should end before invalid char.
|
| int host_end = i == pPos + 1 ? i - 2 : i - 1;
|
| @@ -2490,61 +2431,24 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
|
| str = str.Left(host_end + 1);
|
| break;
|
| }
|
| - return FALSE;
|
| + return false;
|
| }
|
| pPos = i;
|
| }
|
|
|
| - if (str.Find(L"mailto:") == -1) {
|
| + if (str.Find(L"mailto:") == -1)
|
| str = L"mailto:" + str;
|
| - }
|
| - return TRUE;
|
| -}
|
|
|
| -void CPDF_LinkExtract::AppendToLinkList(int start,
|
| - int count,
|
| - const CFX_WideString& strUrl) {
|
| - CPDF_LinkExt* linkInfo = new CPDF_LinkExt;
|
| - linkInfo->m_strUrl = strUrl;
|
| - linkInfo->m_Start = start;
|
| - linkInfo->m_Count = count;
|
| - m_LinkList.Add(linkInfo);
|
| + return true;
|
| }
|
|
|
| -CFX_WideString CPDF_LinkExtract::GetURL(int index) const {
|
| - if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
|
| - return L"";
|
| - }
|
| - CPDF_LinkExt* link = NULL;
|
| - link = m_LinkList.GetAt(index);
|
| - if (!link) {
|
| - return L"";
|
| - }
|
| - return link->m_strUrl;
|
| -}
|
| -void CPDF_LinkExtract::GetBoundedSegment(int index,
|
| - int& start,
|
| - int& count) const {
|
| - if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
|
| - return;
|
| - }
|
| - CPDF_LinkExt* link = NULL;
|
| - link = m_LinkList.GetAt(index);
|
| - if (!link) {
|
| - return;
|
| - }
|
| - start = link->m_Start;
|
| - count = link->m_Count;
|
| +CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {
|
| + return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
|
| }
|
|
|
| -void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
|
| - if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
|
| - return;
|
| - }
|
| - CPDF_LinkExt* link = NULL;
|
| - link = m_LinkList.GetAt(index);
|
| - if (!link) {
|
| - return;
|
| +void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const {
|
| + if (index < m_LinkArray.size()) {
|
| + m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
|
| + m_LinkArray[index].m_Count, pRects);
|
| }
|
| - m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
|
| }
|
|
|