| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <cctype> | 8 #include <cctype> |
| 9 #include <cwctype> | 9 #include <cwctype> |
| 10 #include <memory> | 10 #include <memory> |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 255 return -1; | 255 return -1; |
| 256 | 256 |
| 257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; | 257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; |
| 258 } | 258 } |
| 259 } | 259 } |
| 260 return -1; | 260 return -1; |
| 261 } | 261 } |
| 262 | 262 |
| 263 void CPDF_TextPage::GetRectArray(int start, | 263 void CPDF_TextPage::GetRectArray(int start, |
| 264 int nCount, | 264 int nCount, |
| 265 CFX_RectArray& rectArray) const { | 265 CFX_RectArray* rectArray) const { |
| 266 if (start < 0 || nCount == 0) { | 266 if (start < 0 || nCount == 0) { |
| 267 return; | 267 return; |
| 268 } | 268 } |
| 269 if (!m_bIsParsed) { | 269 if (!m_bIsParsed) { |
| 270 return; | 270 return; |
| 271 } | 271 } |
| 272 CPDF_TextObject* pCurObj = NULL; | 272 CPDF_TextObject* pCurObj = NULL; |
| 273 CFX_FloatRect rect; | 273 CFX_FloatRect rect; |
| 274 int curPos = start; | 274 int curPos = start; |
| 275 FX_BOOL flagNewRect = TRUE; | 275 FX_BOOL flagNewRect = TRUE; |
| 276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) || | 276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) || |
| 277 nCount == -1) { | 277 nCount == -1) { |
| 278 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | 278 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
| 279 } | 279 } |
| 280 while (nCount--) { | 280 while (nCount--) { |
| 281 PAGECHAR_INFO info_curchar = m_CharList[curPos++]; | 281 PAGECHAR_INFO info_curchar = m_CharList[curPos++]; |
| 282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 283 continue; | 283 continue; |
| 284 } | 284 } |
| 285 if (info_curchar.m_CharBox.Width() < 0.01 || | 285 if (info_curchar.m_CharBox.Width() < 0.01 || |
| 286 info_curchar.m_CharBox.Height() < 0.01) { | 286 info_curchar.m_CharBox.Height() < 0.01) { |
| 287 continue; | 287 continue; |
| 288 } | 288 } |
| 289 if (!pCurObj) { | 289 if (!pCurObj) { |
| 290 pCurObj = info_curchar.m_pTextObj; | 290 pCurObj = info_curchar.m_pTextObj; |
| 291 } | 291 } |
| 292 if (pCurObj != info_curchar.m_pTextObj) { | 292 if (pCurObj != info_curchar.m_pTextObj) { |
| 293 rectArray.Add(rect); | 293 rectArray->Add(rect); |
| 294 pCurObj = info_curchar.m_pTextObj; | 294 pCurObj = info_curchar.m_pTextObj; |
| 295 flagNewRect = TRUE; | 295 flagNewRect = TRUE; |
| 296 } | 296 } |
| 297 if (flagNewRect) { | 297 if (flagNewRect) { |
| 298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY; | 298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY; |
| 299 CFX_Matrix matrix, matrix_reverse; | 299 CFX_Matrix matrix, matrix_reverse; |
| 300 info_curchar.m_pTextObj->GetTextMatrix(&matrix); | 300 info_curchar.m_pTextObj->GetTextMatrix(&matrix); |
| 301 matrix.Concat(info_curchar.m_Matrix); | 301 matrix.Concat(info_curchar.m_Matrix); |
| 302 matrix_reverse.SetReverse(matrix); | 302 matrix_reverse.SetReverse(matrix); |
| 303 matrix_reverse.Transform(orgX, orgY); | 303 matrix_reverse.Transform(orgX, orgY); |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 336 rect.right = info_curchar.m_CharBox.right; | 336 rect.right = info_curchar.m_CharBox.right; |
| 337 } | 337 } |
| 338 if (rect.top < info_curchar.m_CharBox.top) { | 338 if (rect.top < info_curchar.m_CharBox.top) { |
| 339 rect.top = info_curchar.m_CharBox.top; | 339 rect.top = info_curchar.m_CharBox.top; |
| 340 } | 340 } |
| 341 if (rect.bottom > info_curchar.m_CharBox.bottom) { | 341 if (rect.bottom > info_curchar.m_CharBox.bottom) { |
| 342 rect.bottom = info_curchar.m_CharBox.bottom; | 342 rect.bottom = info_curchar.m_CharBox.bottom; |
| 343 } | 343 } |
| 344 } | 344 } |
| 345 } | 345 } |
| 346 rectArray.Add(rect); | 346 rectArray->Add(rect); |
| 347 } | 347 } |
| 348 | 348 |
| 349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point, | 349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point, |
| 350 FX_FLOAT xTolerance, | 350 FX_FLOAT xTolerance, |
| 351 FX_FLOAT yTolerance) const { | 351 FX_FLOAT yTolerance) const { |
| 352 if (!m_bIsParsed) | 352 if (!m_bIsParsed) |
| 353 return -3; | 353 return -3; |
| 354 | 354 |
| 355 int pos = 0; | 355 int pos = 0; |
| 356 int NearPos = -1; | 356 int NearPos = -1; |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 585 | 585 |
| 586 int CPDF_TextPage::CountRects(int start, int nCount) { | 586 int CPDF_TextPage::CountRects(int start, int nCount) { |
| 587 if (!m_bIsParsed || start < 0) | 587 if (!m_bIsParsed || start < 0) |
| 588 return -1; | 588 return -1; |
| 589 | 589 |
| 590 if (nCount == -1 || | 590 if (nCount == -1 || |
| 591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { | 591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { |
| 592 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | 592 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
| 593 } | 593 } |
| 594 m_SelRects.RemoveAll(); | 594 m_SelRects.RemoveAll(); |
| 595 GetRectArray(start, nCount, m_SelRects); | 595 GetRectArray(start, nCount, &m_SelRects); |
| 596 return m_SelRects.GetSize(); | 596 return m_SelRects.GetSize(); |
| 597 } | 597 } |
| 598 | 598 |
| 599 void CPDF_TextPage::GetRect(int rectIndex, | 599 void CPDF_TextPage::GetRect(int rectIndex, |
| 600 FX_FLOAT& left, | 600 FX_FLOAT& left, |
| 601 FX_FLOAT& top, | 601 FX_FLOAT& top, |
| 602 FX_FLOAT& right, | 602 FX_FLOAT& right, |
| 603 FX_FLOAT& bottom) const { | 603 FX_FLOAT& bottom) const { |
| 604 if (!m_bIsParsed) | 604 if (!m_bIsParsed) |
| 605 return; | 605 return; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 642 Rotate = (int)(a * 180 / FX_PI + 0.5); | 642 Rotate = (int)(a * 180 / FX_PI + 0.5); |
| 643 } | 643 } |
| 644 if (Rotate < 0) { | 644 if (Rotate < 0) { |
| 645 Rotate = -Rotate; | 645 Rotate = -Rotate; |
| 646 } else if (Rotate > 0) { | 646 } else if (Rotate > 0) { |
| 647 Rotate = 360 - Rotate; | 647 Rotate = 360 - Rotate; |
| 648 } | 648 } |
| 649 return TRUE; | 649 return TRUE; |
| 650 } | 650 } |
| 651 | 651 |
| 652 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, | |
| 653 int& Rotate) { | |
| 654 int start, end, count, | |
| 655 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, | |
| 656 TRUE); | |
| 657 if (n < 1) { | |
| 658 return FALSE; | |
| 659 } | |
| 660 if (n > 1) { | |
| 661 GetBoundedSegment(n - 1, start, count); | |
| 662 end = start + count - 1; | |
| 663 GetBoundedSegment(0, start, count); | |
| 664 } else { | |
| 665 GetBoundedSegment(0, start, count); | |
| 666 end = start + count - 1; | |
| 667 } | |
| 668 return GetBaselineRotate(start, end, Rotate); | |
| 669 } | |
| 670 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | |
| 671 if (!m_bIsParsed) | |
| 672 return FALSE; | |
| 673 | |
| 674 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | |
| 675 return FALSE; | |
| 676 | |
| 677 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | |
| 678 return GetBaselineRotate(rect, Rotate); | |
| 679 } | |
| 680 | |
| 681 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 652 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| 682 FX_FLOAT top, | 653 FX_FLOAT top, |
| 683 FX_FLOAT right, | 654 FX_FLOAT right, |
| 684 FX_FLOAT bottom, | 655 FX_FLOAT bottom, |
| 685 FX_BOOL bContains) { | 656 FX_BOOL bContains) { |
| 686 m_Segments.RemoveAll(); | 657 m_Segments.RemoveAll(); |
| 687 if (!m_bIsParsed) | 658 if (!m_bIsParsed) |
| 688 return -1; | 659 return -1; |
| 689 | 660 |
| 690 CFX_FloatRect rect(left, bottom, right, top); | 661 CFX_FloatRect rect(left, bottom, right, top); |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 749 } | 720 } |
| 750 if (segmentStatus == 1) { | 721 if (segmentStatus == 1) { |
| 751 segmentStatus = 2; | 722 segmentStatus = 2; |
| 752 m_Segments.Add(segment); | 723 m_Segments.Add(segment); |
| 753 segment.m_Start = 0; | 724 segment.m_Start = 0; |
| 754 segment.m_nCount = 0; | 725 segment.m_nCount = 0; |
| 755 } | 726 } |
| 756 return m_Segments.GetSize(); | 727 return m_Segments.GetSize(); |
| 757 } | 728 } |
| 758 | 729 |
| 759 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { | |
| 760 if (index < 0 || index >= m_Segments.GetSize()) { | |
| 761 return; | |
| 762 } | |
| 763 start = m_Segments.GetAt(index).m_Start; | |
| 764 count = m_Segments.GetAt(index).m_nCount; | |
| 765 } | |
| 766 | |
| 767 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 730 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| 768 if (!m_bIsParsed) | 731 if (!m_bIsParsed) |
| 769 return -1; | 732 return -1; |
| 770 | 733 |
| 771 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) | 734 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
| 772 return -1; | 735 return -1; |
| 773 | 736 |
| 774 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) | 737 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) |
| 775 return -1; | 738 return -1; |
| 776 | 739 |
| (...skipping 1342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2119 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); | 2082 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); |
| 2120 } else { | 2083 } else { |
| 2121 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); | 2084 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); |
| 2122 } | 2085 } |
| 2123 } | 2086 } |
| 2124 } | 2087 } |
| 2125 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1; | 2088 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1; |
| 2126 m_IsFind = TRUE; | 2089 m_IsFind = TRUE; |
| 2127 int resStart = GetCharIndex(m_resStart); | 2090 int resStart = GetCharIndex(m_resStart); |
| 2128 int resEnd = GetCharIndex(m_resEnd); | 2091 int resEnd = GetCharIndex(m_resEnd); |
| 2129 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); | 2092 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray); |
| 2130 if (m_flags & FPDFTEXT_CONSECUTIVE) { | 2093 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
| 2131 m_findNextStart = m_resStart + 1; | 2094 m_findNextStart = m_resStart + 1; |
| 2132 m_findPreStart = m_resEnd - 1; | 2095 m_findPreStart = m_resEnd - 1; |
| 2133 } else { | 2096 } else { |
| 2134 m_findNextStart = m_resEnd + 1; | 2097 m_findNextStart = m_resEnd + 1; |
| 2135 m_findPreStart = m_resStart - 1; | 2098 m_findPreStart = m_resStart - 1; |
| 2136 } | 2099 } |
| 2137 return m_IsFind; | 2100 return m_IsFind; |
| 2138 } | 2101 } |
| 2139 | 2102 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 2165 MatchedCount = MatchedCount1; | 2128 MatchedCount = MatchedCount1; |
| 2166 } | 2129 } |
| 2167 } | 2130 } |
| 2168 if (order == -1) { | 2131 if (order == -1) { |
| 2169 m_IsFind = FALSE; | 2132 m_IsFind = FALSE; |
| 2170 return m_IsFind; | 2133 return m_IsFind; |
| 2171 } | 2134 } |
| 2172 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); | 2135 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); |
| 2173 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); | 2136 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); |
| 2174 m_IsFind = TRUE; | 2137 m_IsFind = TRUE; |
| 2175 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); | 2138 m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray); |
| 2176 if (m_flags & FPDFTEXT_CONSECUTIVE) { | 2139 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
| 2177 m_findNextStart = m_resStart + 1; | 2140 m_findNextStart = m_resStart + 1; |
| 2178 m_findPreStart = m_resEnd - 1; | 2141 m_findPreStart = m_resEnd - 1; |
| 2179 } else { | 2142 } else { |
| 2180 m_findNextStart = m_resEnd + 1; | 2143 m_findNextStart = m_resEnd + 1; |
| 2181 m_findPreStart = m_resStart - 1; | 2144 m_findPreStart = m_resStart - 1; |
| 2182 } | 2145 } |
| 2183 return m_IsFind; | 2146 return m_IsFind; |
| 2184 } | 2147 } |
| 2185 | 2148 |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2323 int CPDF_TextPageFind::GetCurOrder() const { | 2286 int CPDF_TextPageFind::GetCurOrder() const { |
| 2324 return GetCharIndex(m_resStart); | 2287 return GetCharIndex(m_resStart); |
| 2325 } | 2288 } |
| 2326 | 2289 |
| 2327 int CPDF_TextPageFind::GetMatchedCount() const { | 2290 int CPDF_TextPageFind::GetMatchedCount() const { |
| 2328 int resStart = GetCharIndex(m_resStart); | 2291 int resStart = GetCharIndex(m_resStart); |
| 2329 int resEnd = GetCharIndex(m_resEnd); | 2292 int resEnd = GetCharIndex(m_resEnd); |
| 2330 return resEnd - resStart + 1; | 2293 return resEnd - resStart + 1; |
| 2331 } | 2294 } |
| 2332 | 2295 |
| 2333 CPDF_LinkExtract::CPDF_LinkExtract() | 2296 CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage) |
| 2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} | 2297 : m_pTextPage(pTextPage) {} |
| 2335 | 2298 |
| 2336 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2299 CPDF_LinkExtract::~CPDF_LinkExtract() { |
| 2337 DeleteLinkList(); | |
| 2338 } | 2300 } |
| 2339 | 2301 |
| 2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { | 2302 void CPDF_LinkExtract::ExtractLinks() { |
| 2341 if (!pTextPage || !pTextPage->IsParsed()) | 2303 m_LinkArray.clear(); |
| 2342 return FALSE; | 2304 if (!m_pTextPage->IsParsed()) |
| 2305 return; |
| 2343 | 2306 |
| 2344 m_pTextPage = (const CPDF_TextPage*)pTextPage; | |
| 2345 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2307 m_strPageText = m_pTextPage->GetPageText(0, -1); |
| 2346 DeleteLinkList(); | 2308 if (m_strPageText.IsEmpty()) |
| 2347 if (m_strPageText.IsEmpty()) { | 2309 return; |
| 2348 return FALSE; | 2310 |
| 2349 } | |
| 2350 ParseLink(); | 2311 ParseLink(); |
| 2351 m_bIsParsed = true; | |
| 2352 return TRUE; | |
| 2353 } | |
| 2354 | |
| 2355 void CPDF_LinkExtract::DeleteLinkList() { | |
| 2356 while (m_LinkList.GetSize()) { | |
| 2357 CPDF_LinkExt* linkinfo = NULL; | |
| 2358 linkinfo = m_LinkList.GetAt(0); | |
| 2359 m_LinkList.RemoveAt(0); | |
| 2360 delete linkinfo; | |
| 2361 } | |
| 2362 m_LinkList.RemoveAll(); | |
| 2363 } | |
| 2364 | |
| 2365 int CPDF_LinkExtract::CountLinks() const { | |
| 2366 if (!m_bIsParsed) { | |
| 2367 return -1; | |
| 2368 } | |
| 2369 return m_LinkList.GetSize(); | |
| 2370 } | 2312 } |
| 2371 | 2313 |
| 2372 void CPDF_LinkExtract::ParseLink() { | 2314 void CPDF_LinkExtract::ParseLink() { |
| 2373 int start = 0, pos = 0; | 2315 int start = 0, pos = 0; |
| 2374 int TotalChar = m_pTextPage->CountChars(); | 2316 int TotalChar = m_pTextPage->CountChars(); |
| 2375 while (pos < TotalChar) { | 2317 while (pos < TotalChar) { |
| 2376 FPDF_CHAR_INFO pageChar; | 2318 FPDF_CHAR_INFO pageChar; |
| 2377 m_pTextPage->GetCharInfo(pos, &pageChar); | 2319 m_pTextPage->GetCharInfo(pos, &pageChar); |
| 2378 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || | 2320 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || |
| 2379 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { | 2321 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { |
| 2380 int nCount = pos - start; | 2322 int nCount = pos - start; |
| 2381 if (pos == TotalChar - 1) { | 2323 if (pos == TotalChar - 1) { |
| 2382 nCount++; | 2324 nCount++; |
| 2383 } | 2325 } |
| 2384 CFX_WideString strBeCheck; | 2326 CFX_WideString strBeCheck; |
| 2385 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2327 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
| 2386 if (strBeCheck.GetLength() > 5) { | 2328 if (strBeCheck.GetLength() > 5) { |
| 2387 while (strBeCheck.GetLength() > 0) { | 2329 while (strBeCheck.GetLength() > 0) { |
| 2388 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); | 2330 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); |
| 2389 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { | 2331 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { |
| 2390 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); | 2332 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); |
| 2391 nCount--; | 2333 nCount--; |
| 2392 } else { | 2334 } else { |
| 2393 break; | 2335 break; |
| 2394 } | 2336 } |
| 2395 } | 2337 } |
| 2396 if (nCount > 5 && | 2338 if (nCount > 5 && |
| 2397 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { | 2339 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { |
| 2398 AppendToLinkList(start, nCount, strBeCheck); | 2340 m_LinkArray.push_back({start, nCount, strBeCheck}); |
| 2399 } | 2341 } |
| 2400 } | 2342 } |
| 2401 start = ++pos; | 2343 start = ++pos; |
| 2402 } else { | 2344 } else { |
| 2403 pos++; | 2345 pos++; |
| 2404 } | 2346 } |
| 2405 } | 2347 } |
| 2406 } | 2348 } |
| 2407 | 2349 |
| 2408 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { | 2350 bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
| 2409 CFX_WideString str = strBeCheck; | 2351 CFX_WideString str = strBeCheck; |
| 2410 str.MakeLower(); | 2352 str.MakeLower(); |
| 2411 if (str.Find(L"http://www.") != -1) { | 2353 if (str.Find(L"http://www.") != -1) { |
| 2412 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); | 2354 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); |
| 2413 return TRUE; | 2355 return true; |
| 2414 } | 2356 } |
| 2415 if (str.Find(L"http://") != -1) { | 2357 if (str.Find(L"http://") != -1) { |
| 2416 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); | 2358 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); |
| 2417 return TRUE; | 2359 return true; |
| 2418 } | 2360 } |
| 2419 if (str.Find(L"https://www.") != -1) { | 2361 if (str.Find(L"https://www.") != -1) { |
| 2420 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); | 2362 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); |
| 2421 return TRUE; | 2363 return true; |
| 2422 } | 2364 } |
| 2423 if (str.Find(L"https://") != -1) { | 2365 if (str.Find(L"https://") != -1) { |
| 2424 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); | 2366 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); |
| 2425 return TRUE; | 2367 return true; |
| 2426 } | 2368 } |
| 2427 if (str.Find(L"www.") != -1) { | 2369 if (str.Find(L"www.") != -1) { |
| 2428 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2370 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
| 2429 strBeCheck = L"http://" + strBeCheck; | 2371 strBeCheck = L"http://" + strBeCheck; |
| 2430 return TRUE; | 2372 return true; |
| 2431 } | 2373 } |
| 2432 return FALSE; | 2374 return false; |
| 2433 } | 2375 } |
| 2434 | 2376 |
| 2435 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { | 2377 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
| 2436 int aPos = str.Find(L'@'); | 2378 int aPos = str.Find(L'@'); |
| 2437 // Invalid when no '@'. | 2379 // Invalid when no '@'. |
| 2438 if (aPos < 1) { | 2380 if (aPos < 1) |
| 2439 return FALSE; | 2381 return false; |
| 2440 } | |
| 2441 | 2382 |
| 2442 // Check the local part. | 2383 // Check the local part. |
| 2443 int pPos = aPos; // Used to track the position of '@' or '.'. | 2384 int pPos = aPos; // Used to track the position of '@' or '.'. |
| 2444 for (int i = aPos - 1; i >= 0; i--) { | 2385 for (int i = aPos - 1; i >= 0; i--) { |
| 2445 FX_WCHAR ch = str.GetAt(i); | 2386 FX_WCHAR ch = str.GetAt(i); |
| 2446 if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) { | 2387 if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) |
| 2447 continue; | 2388 continue; |
| 2448 } | 2389 |
| 2449 if (ch != L'.' || i == pPos - 1 || i == 0) { | 2390 if (ch != L'.' || i == pPos - 1 || i == 0) { |
| 2450 if (i == aPos - 1) { | 2391 if (i == aPos - 1) { |
| 2451 // There is '.' or invalid char before '@'. | 2392 // There is '.' or invalid char before '@'. |
| 2452 return FALSE; | 2393 return FALSE; |
| 2453 } | 2394 } |
| 2454 // End extracting for other invalid chars, '.' at the beginning, or | 2395 // End extracting for other invalid chars, '.' at the beginning, or |
| 2455 // consecutive '.'. | 2396 // consecutive '.'. |
| 2456 int removed_len = i == pPos - 1 ? i + 2 : i + 1; | 2397 int removed_len = i == pPos - 1 ? i + 2 : i + 1; |
| 2457 str = str.Right(str.GetLength() - removed_len); | 2398 str = str.Right(str.GetLength() - removed_len); |
| 2458 break; | 2399 break; |
| 2459 } | 2400 } |
| 2460 // Found a valid '.'. | 2401 // Found a valid '.'. |
| 2461 pPos = i; | 2402 pPos = i; |
| 2462 } | 2403 } |
| 2463 | 2404 |
| 2464 // Check the domain name part. | 2405 // Check the domain name part. |
| 2465 aPos = str.Find(L'@'); | 2406 aPos = str.Find(L'@'); |
| 2466 if (aPos < 1) { | 2407 if (aPos < 1) |
| 2467 return FALSE; | 2408 return false; |
| 2468 } | 2409 |
| 2469 str.TrimRight(L'.'); | 2410 str.TrimRight(L'.'); |
| 2470 // At least one '.' in domain name, but not at the beginning. | 2411 // At least one '.' in domain name, but not at the beginning. |
| 2471 // TODO(weili): RFC5322 allows domain names to be a local name without '.'. | 2412 // TODO(weili): RFC5322 allows domain names to be a local name without '.'. |
| 2472 // Check whether we should remove this check. | 2413 // Check whether we should remove this check. |
| 2473 int ePos = str.Find(L'.', aPos + 1); | 2414 int ePos = str.Find(L'.', aPos + 1); |
| 2474 if (ePos == -1 || ePos == aPos + 1) { | 2415 if (ePos == -1 || ePos == aPos + 1) |
| 2475 return FALSE; | 2416 return false; |
| 2476 } | 2417 |
| 2477 // Validate all other chars in domain name. | 2418 // Validate all other chars in domain name. |
| 2478 int nLen = str.GetLength(); | 2419 int nLen = str.GetLength(); |
| 2479 pPos = 0; // Used to track the position of '.'. | 2420 pPos = 0; // Used to track the position of '.'. |
| 2480 for (int i = aPos + 1; i < nLen; i++) { | 2421 for (int i = aPos + 1; i < nLen; i++) { |
| 2481 FX_WCHAR wch = str.GetAt(i); | 2422 FX_WCHAR wch = str.GetAt(i); |
| 2482 if (wch == L'-' || FXSYS_iswalnum(wch)) { | 2423 if (wch == L'-' || FXSYS_iswalnum(wch)) |
| 2483 continue; | 2424 continue; |
| 2484 } | 2425 |
| 2485 if (wch != L'.' || i == pPos + 1) { | 2426 if (wch != L'.' || i == pPos + 1) { |
| 2486 // Domain name should end before invalid char. | 2427 // Domain name should end before invalid char. |
| 2487 int host_end = i == pPos + 1 ? i - 2 : i - 1; | 2428 int host_end = i == pPos + 1 ? i - 2 : i - 1; |
| 2488 if (pPos > 0 && host_end - aPos >= 3) { | 2429 if (pPos > 0 && host_end - aPos >= 3) { |
| 2489 // Trim the ending invalid chars if there is at least one '.' and name. | 2430 // Trim the ending invalid chars if there is at least one '.' and name. |
| 2490 str = str.Left(host_end + 1); | 2431 str = str.Left(host_end + 1); |
| 2491 break; | 2432 break; |
| 2492 } | 2433 } |
| 2493 return FALSE; | 2434 return false; |
| 2494 } | 2435 } |
| 2495 pPos = i; | 2436 pPos = i; |
| 2496 } | 2437 } |
| 2497 | 2438 |
| 2498 if (str.Find(L"mailto:") == -1) { | 2439 if (str.Find(L"mailto:") == -1) |
| 2499 str = L"mailto:" + str; | 2440 str = L"mailto:" + str; |
| 2500 } | 2441 |
| 2501 return TRUE; | 2442 return true; |
| 2502 } | 2443 } |
| 2503 | 2444 |
| 2504 void CPDF_LinkExtract::AppendToLinkList(int start, | 2445 CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const { |
| 2505 int count, | 2446 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
| 2506 const CFX_WideString& strUrl) { | |
| 2507 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | |
| 2508 linkInfo->m_strUrl = strUrl; | |
| 2509 linkInfo->m_Start = start; | |
| 2510 linkInfo->m_Count = count; | |
| 2511 m_LinkList.Add(linkInfo); | |
| 2512 } | 2447 } |
| 2513 | 2448 |
| 2514 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | 2449 void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const { |
| 2515 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2450 if (index < m_LinkArray.size()) { |
| 2516 return L""; | 2451 m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
| 2452 m_LinkArray[index].m_Count, pRects); |
| 2517 } | 2453 } |
| 2518 CPDF_LinkExt* link = NULL; | |
| 2519 link = m_LinkList.GetAt(index); | |
| 2520 if (!link) { | |
| 2521 return L""; | |
| 2522 } | |
| 2523 return link->m_strUrl; | |
| 2524 } | 2454 } |
| 2525 void CPDF_LinkExtract::GetBoundedSegment(int index, | |
| 2526 int& start, | |
| 2527 int& count) const { | |
| 2528 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | |
| 2529 return; | |
| 2530 } | |
| 2531 CPDF_LinkExt* link = NULL; | |
| 2532 link = m_LinkList.GetAt(index); | |
| 2533 if (!link) { | |
| 2534 return; | |
| 2535 } | |
| 2536 start = link->m_Start; | |
| 2537 count = link->m_Count; | |
| 2538 } | |
| 2539 | |
| 2540 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | |
| 2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | |
| 2542 return; | |
| 2543 } | |
| 2544 CPDF_LinkExt* link = NULL; | |
| 2545 link = m_LinkList.GetAt(index); | |
| 2546 if (!link) { | |
| 2547 return; | |
| 2548 } | |
| 2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | |
| 2550 } | |
| OLD | NEW |