OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
255 return -1; | 255 return -1; |
256 | 256 |
257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; | 257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; |
258 } | 258 } |
259 } | 259 } |
260 return -1; | 260 return -1; |
261 } | 261 } |
262 | 262 |
263 void CPDF_TextPage::GetRectArray(int start, | 263 void CPDF_TextPage::GetRectArray(int start, |
264 int nCount, | 264 int nCount, |
265 CFX_RectArray& rectArray) const { | 265 CFX_RectArray* rectArray) const { |
266 if (start < 0 || nCount == 0) { | 266 if (start < 0 || nCount == 0) { |
267 return; | 267 return; |
268 } | 268 } |
269 if (!m_bIsParsed) { | 269 if (!m_bIsParsed) { |
270 return; | 270 return; |
271 } | 271 } |
272 CPDF_TextObject* pCurObj = NULL; | 272 CPDF_TextObject* pCurObj = NULL; |
273 CFX_FloatRect rect; | 273 CFX_FloatRect rect; |
274 int curPos = start; | 274 int curPos = start; |
275 FX_BOOL flagNewRect = TRUE; | 275 FX_BOOL flagNewRect = TRUE; |
276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) || | 276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) || |
277 nCount == -1) { | 277 nCount == -1) { |
278 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | 278 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
279 } | 279 } |
280 while (nCount--) { | 280 while (nCount--) { |
281 PAGECHAR_INFO info_curchar = m_CharList[curPos++]; | 281 PAGECHAR_INFO info_curchar = m_CharList[curPos++]; |
282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
283 continue; | 283 continue; |
284 } | 284 } |
285 if (info_curchar.m_CharBox.Width() < 0.01 || | 285 if (info_curchar.m_CharBox.Width() < 0.01 || |
286 info_curchar.m_CharBox.Height() < 0.01) { | 286 info_curchar.m_CharBox.Height() < 0.01) { |
287 continue; | 287 continue; |
288 } | 288 } |
289 if (!pCurObj) { | 289 if (!pCurObj) { |
290 pCurObj = info_curchar.m_pTextObj; | 290 pCurObj = info_curchar.m_pTextObj; |
291 } | 291 } |
292 if (pCurObj != info_curchar.m_pTextObj) { | 292 if (pCurObj != info_curchar.m_pTextObj) { |
293 rectArray.Add(rect); | 293 rectArray->Add(rect); |
294 pCurObj = info_curchar.m_pTextObj; | 294 pCurObj = info_curchar.m_pTextObj; |
295 flagNewRect = TRUE; | 295 flagNewRect = TRUE; |
296 } | 296 } |
297 if (flagNewRect) { | 297 if (flagNewRect) { |
298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY; | 298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY; |
299 CFX_Matrix matrix, matrix_reverse; | 299 CFX_Matrix matrix, matrix_reverse; |
300 info_curchar.m_pTextObj->GetTextMatrix(&matrix); | 300 info_curchar.m_pTextObj->GetTextMatrix(&matrix); |
301 matrix.Concat(info_curchar.m_Matrix); | 301 matrix.Concat(info_curchar.m_Matrix); |
302 matrix_reverse.SetReverse(matrix); | 302 matrix_reverse.SetReverse(matrix); |
303 matrix_reverse.Transform(orgX, orgY); | 303 matrix_reverse.Transform(orgX, orgY); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
336 rect.right = info_curchar.m_CharBox.right; | 336 rect.right = info_curchar.m_CharBox.right; |
337 } | 337 } |
338 if (rect.top < info_curchar.m_CharBox.top) { | 338 if (rect.top < info_curchar.m_CharBox.top) { |
339 rect.top = info_curchar.m_CharBox.top; | 339 rect.top = info_curchar.m_CharBox.top; |
340 } | 340 } |
341 if (rect.bottom > info_curchar.m_CharBox.bottom) { | 341 if (rect.bottom > info_curchar.m_CharBox.bottom) { |
342 rect.bottom = info_curchar.m_CharBox.bottom; | 342 rect.bottom = info_curchar.m_CharBox.bottom; |
343 } | 343 } |
344 } | 344 } |
345 } | 345 } |
346 rectArray.Add(rect); | 346 rectArray->Add(rect); |
347 } | 347 } |
348 | 348 |
349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point, | 349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point, |
350 FX_FLOAT xTolerance, | 350 FX_FLOAT xTolerance, |
351 FX_FLOAT yTolerance) const { | 351 FX_FLOAT yTolerance) const { |
352 if (!m_bIsParsed) | 352 if (!m_bIsParsed) |
353 return -3; | 353 return -3; |
354 | 354 |
355 int pos = 0; | 355 int pos = 0; |
356 int NearPos = -1; | 356 int NearPos = -1; |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
585 | 585 |
586 int CPDF_TextPage::CountRects(int start, int nCount) { | 586 int CPDF_TextPage::CountRects(int start, int nCount) { |
587 if (!m_bIsParsed || start < 0) | 587 if (!m_bIsParsed || start < 0) |
588 return -1; | 588 return -1; |
589 | 589 |
590 if (nCount == -1 || | 590 if (nCount == -1 || |
591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { | 591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { |
592 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | 592 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
593 } | 593 } |
594 m_SelRects.RemoveAll(); | 594 m_SelRects.RemoveAll(); |
595 GetRectArray(start, nCount, m_SelRects); | 595 GetRectArray(start, nCount, &m_SelRects); |
596 return m_SelRects.GetSize(); | 596 return m_SelRects.GetSize(); |
597 } | 597 } |
598 | 598 |
599 void CPDF_TextPage::GetRect(int rectIndex, | 599 void CPDF_TextPage::GetRect(int rectIndex, |
600 FX_FLOAT& left, | 600 FX_FLOAT& left, |
601 FX_FLOAT& top, | 601 FX_FLOAT& top, |
602 FX_FLOAT& right, | 602 FX_FLOAT& right, |
603 FX_FLOAT& bottom) const { | 603 FX_FLOAT& bottom) const { |
604 if (!m_bIsParsed) | 604 if (!m_bIsParsed) |
605 return; | 605 return; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
642 Rotate = (int)(a * 180 / FX_PI + 0.5); | 642 Rotate = (int)(a * 180 / FX_PI + 0.5); |
643 } | 643 } |
644 if (Rotate < 0) { | 644 if (Rotate < 0) { |
645 Rotate = -Rotate; | 645 Rotate = -Rotate; |
646 } else if (Rotate > 0) { | 646 } else if (Rotate > 0) { |
647 Rotate = 360 - Rotate; | 647 Rotate = 360 - Rotate; |
648 } | 648 } |
649 return TRUE; | 649 return TRUE; |
650 } | 650 } |
651 | 651 |
652 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, | |
653 int& Rotate) { | |
654 int start, end, count, | |
655 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, | |
656 TRUE); | |
657 if (n < 1) { | |
658 return FALSE; | |
659 } | |
660 if (n > 1) { | |
661 GetBoundedSegment(n - 1, start, count); | |
662 end = start + count - 1; | |
663 GetBoundedSegment(0, start, count); | |
664 } else { | |
665 GetBoundedSegment(0, start, count); | |
666 end = start + count - 1; | |
667 } | |
668 return GetBaselineRotate(start, end, Rotate); | |
669 } | |
670 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | |
671 if (!m_bIsParsed) | |
672 return FALSE; | |
673 | |
674 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | |
675 return FALSE; | |
676 | |
677 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | |
678 return GetBaselineRotate(rect, Rotate); | |
679 } | |
680 | |
681 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 652 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
682 FX_FLOAT top, | 653 FX_FLOAT top, |
683 FX_FLOAT right, | 654 FX_FLOAT right, |
684 FX_FLOAT bottom, | 655 FX_FLOAT bottom, |
685 FX_BOOL bContains) { | 656 FX_BOOL bContains) { |
686 m_Segments.RemoveAll(); | 657 m_Segments.RemoveAll(); |
687 if (!m_bIsParsed) | 658 if (!m_bIsParsed) |
688 return -1; | 659 return -1; |
689 | 660 |
690 CFX_FloatRect rect(left, bottom, right, top); | 661 CFX_FloatRect rect(left, bottom, right, top); |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
749 } | 720 } |
750 if (segmentStatus == 1) { | 721 if (segmentStatus == 1) { |
751 segmentStatus = 2; | 722 segmentStatus = 2; |
752 m_Segments.Add(segment); | 723 m_Segments.Add(segment); |
753 segment.m_Start = 0; | 724 segment.m_Start = 0; |
754 segment.m_nCount = 0; | 725 segment.m_nCount = 0; |
755 } | 726 } |
756 return m_Segments.GetSize(); | 727 return m_Segments.GetSize(); |
757 } | 728 } |
758 | 729 |
759 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { | |
760 if (index < 0 || index >= m_Segments.GetSize()) { | |
761 return; | |
762 } | |
763 start = m_Segments.GetAt(index).m_Start; | |
764 count = m_Segments.GetAt(index).m_nCount; | |
765 } | |
766 | |
767 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 730 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
768 if (!m_bIsParsed) | 731 if (!m_bIsParsed) |
769 return -1; | 732 return -1; |
770 | 733 |
771 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) | 734 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
772 return -1; | 735 return -1; |
773 | 736 |
774 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) | 737 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) |
775 return -1; | 738 return -1; |
776 | 739 |
(...skipping 1342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2119 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); | 2082 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); |
2120 } else { | 2083 } else { |
2121 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); | 2084 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); |
2122 } | 2085 } |
2123 } | 2086 } |
2124 } | 2087 } |
2125 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1; | 2088 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1; |
2126 m_IsFind = TRUE; | 2089 m_IsFind = TRUE; |
2127 int resStart = GetCharIndex(m_resStart); | 2090 int resStart = GetCharIndex(m_resStart); |
2128 int resEnd = GetCharIndex(m_resEnd); | 2091 int resEnd = GetCharIndex(m_resEnd); |
2129 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); | 2092 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray); |
2130 if (m_flags & FPDFTEXT_CONSECUTIVE) { | 2093 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
2131 m_findNextStart = m_resStart + 1; | 2094 m_findNextStart = m_resStart + 1; |
2132 m_findPreStart = m_resEnd - 1; | 2095 m_findPreStart = m_resEnd - 1; |
2133 } else { | 2096 } else { |
2134 m_findNextStart = m_resEnd + 1; | 2097 m_findNextStart = m_resEnd + 1; |
2135 m_findPreStart = m_resStart - 1; | 2098 m_findPreStart = m_resStart - 1; |
2136 } | 2099 } |
2137 return m_IsFind; | 2100 return m_IsFind; |
2138 } | 2101 } |
2139 | 2102 |
(...skipping 25 matching lines...) Expand all Loading... |
2165 MatchedCount = MatchedCount1; | 2128 MatchedCount = MatchedCount1; |
2166 } | 2129 } |
2167 } | 2130 } |
2168 if (order == -1) { | 2131 if (order == -1) { |
2169 m_IsFind = FALSE; | 2132 m_IsFind = FALSE; |
2170 return m_IsFind; | 2133 return m_IsFind; |
2171 } | 2134 } |
2172 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); | 2135 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); |
2173 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); | 2136 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); |
2174 m_IsFind = TRUE; | 2137 m_IsFind = TRUE; |
2175 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); | 2138 m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray); |
2176 if (m_flags & FPDFTEXT_CONSECUTIVE) { | 2139 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
2177 m_findNextStart = m_resStart + 1; | 2140 m_findNextStart = m_resStart + 1; |
2178 m_findPreStart = m_resEnd - 1; | 2141 m_findPreStart = m_resEnd - 1; |
2179 } else { | 2142 } else { |
2180 m_findNextStart = m_resEnd + 1; | 2143 m_findNextStart = m_resEnd + 1; |
2181 m_findPreStart = m_resStart - 1; | 2144 m_findPreStart = m_resStart - 1; |
2182 } | 2145 } |
2183 return m_IsFind; | 2146 return m_IsFind; |
2184 } | 2147 } |
2185 | 2148 |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2323 int CPDF_TextPageFind::GetCurOrder() const { | 2286 int CPDF_TextPageFind::GetCurOrder() const { |
2324 return GetCharIndex(m_resStart); | 2287 return GetCharIndex(m_resStart); |
2325 } | 2288 } |
2326 | 2289 |
2327 int CPDF_TextPageFind::GetMatchedCount() const { | 2290 int CPDF_TextPageFind::GetMatchedCount() const { |
2328 int resStart = GetCharIndex(m_resStart); | 2291 int resStart = GetCharIndex(m_resStart); |
2329 int resEnd = GetCharIndex(m_resEnd); | 2292 int resEnd = GetCharIndex(m_resEnd); |
2330 return resEnd - resStart + 1; | 2293 return resEnd - resStart + 1; |
2331 } | 2294 } |
2332 | 2295 |
2333 CPDF_LinkExtract::CPDF_LinkExtract() | 2296 CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage) |
2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} | 2297 : m_pTextPage(pTextPage) {} |
2335 | 2298 |
2336 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2299 CPDF_LinkExtract::~CPDF_LinkExtract() { |
2337 DeleteLinkList(); | |
2338 } | 2300 } |
2339 | 2301 |
2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { | 2302 void CPDF_LinkExtract::ExtractLinks() { |
2341 if (!pTextPage || !pTextPage->IsParsed()) | 2303 m_LinkArray.clear(); |
2342 return FALSE; | 2304 if (!m_pTextPage->IsParsed()) |
| 2305 return; |
2343 | 2306 |
2344 m_pTextPage = (const CPDF_TextPage*)pTextPage; | |
2345 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2307 m_strPageText = m_pTextPage->GetPageText(0, -1); |
2346 DeleteLinkList(); | 2308 if (m_strPageText.IsEmpty()) |
2347 if (m_strPageText.IsEmpty()) { | 2309 return; |
2348 return FALSE; | 2310 |
2349 } | |
2350 ParseLink(); | 2311 ParseLink(); |
2351 m_bIsParsed = true; | |
2352 return TRUE; | |
2353 } | |
2354 | |
2355 void CPDF_LinkExtract::DeleteLinkList() { | |
2356 while (m_LinkList.GetSize()) { | |
2357 CPDF_LinkExt* linkinfo = NULL; | |
2358 linkinfo = m_LinkList.GetAt(0); | |
2359 m_LinkList.RemoveAt(0); | |
2360 delete linkinfo; | |
2361 } | |
2362 m_LinkList.RemoveAll(); | |
2363 } | |
2364 | |
2365 int CPDF_LinkExtract::CountLinks() const { | |
2366 if (!m_bIsParsed) { | |
2367 return -1; | |
2368 } | |
2369 return m_LinkList.GetSize(); | |
2370 } | 2312 } |
2371 | 2313 |
2372 void CPDF_LinkExtract::ParseLink() { | 2314 void CPDF_LinkExtract::ParseLink() { |
2373 int start = 0, pos = 0; | 2315 int start = 0, pos = 0; |
2374 int TotalChar = m_pTextPage->CountChars(); | 2316 int TotalChar = m_pTextPage->CountChars(); |
2375 while (pos < TotalChar) { | 2317 while (pos < TotalChar) { |
2376 FPDF_CHAR_INFO pageChar; | 2318 FPDF_CHAR_INFO pageChar; |
2377 m_pTextPage->GetCharInfo(pos, &pageChar); | 2319 m_pTextPage->GetCharInfo(pos, &pageChar); |
2378 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || | 2320 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || |
2379 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { | 2321 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { |
2380 int nCount = pos - start; | 2322 int nCount = pos - start; |
2381 if (pos == TotalChar - 1) { | 2323 if (pos == TotalChar - 1) { |
2382 nCount++; | 2324 nCount++; |
2383 } | 2325 } |
2384 CFX_WideString strBeCheck; | 2326 CFX_WideString strBeCheck; |
2385 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2327 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
2386 if (strBeCheck.GetLength() > 5) { | 2328 if (strBeCheck.GetLength() > 5) { |
2387 while (strBeCheck.GetLength() > 0) { | 2329 while (strBeCheck.GetLength() > 0) { |
2388 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); | 2330 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); |
2389 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { | 2331 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { |
2390 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); | 2332 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); |
2391 nCount--; | 2333 nCount--; |
2392 } else { | 2334 } else { |
2393 break; | 2335 break; |
2394 } | 2336 } |
2395 } | 2337 } |
2396 if (nCount > 5 && | 2338 if (nCount > 5 && |
2397 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { | 2339 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { |
2398 AppendToLinkList(start, nCount, strBeCheck); | 2340 m_LinkArray.push_back({start, nCount, strBeCheck}); |
2399 } | 2341 } |
2400 } | 2342 } |
2401 start = ++pos; | 2343 start = ++pos; |
2402 } else { | 2344 } else { |
2403 pos++; | 2345 pos++; |
2404 } | 2346 } |
2405 } | 2347 } |
2406 } | 2348 } |
2407 | 2349 |
2408 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { | 2350 bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
2409 CFX_WideString str = strBeCheck; | 2351 CFX_WideString str = strBeCheck; |
2410 str.MakeLower(); | 2352 str.MakeLower(); |
2411 if (str.Find(L"http://www.") != -1) { | 2353 if (str.Find(L"http://www.") != -1) { |
2412 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); | 2354 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); |
2413 return TRUE; | 2355 return true; |
2414 } | 2356 } |
2415 if (str.Find(L"http://") != -1) { | 2357 if (str.Find(L"http://") != -1) { |
2416 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); | 2358 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); |
2417 return TRUE; | 2359 return true; |
2418 } | 2360 } |
2419 if (str.Find(L"https://www.") != -1) { | 2361 if (str.Find(L"https://www.") != -1) { |
2420 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); | 2362 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); |
2421 return TRUE; | 2363 return true; |
2422 } | 2364 } |
2423 if (str.Find(L"https://") != -1) { | 2365 if (str.Find(L"https://") != -1) { |
2424 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); | 2366 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); |
2425 return TRUE; | 2367 return true; |
2426 } | 2368 } |
2427 if (str.Find(L"www.") != -1) { | 2369 if (str.Find(L"www.") != -1) { |
2428 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2370 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
2429 strBeCheck = L"http://" + strBeCheck; | 2371 strBeCheck = L"http://" + strBeCheck; |
2430 return TRUE; | 2372 return true; |
2431 } | 2373 } |
2432 return FALSE; | 2374 return false; |
2433 } | 2375 } |
2434 | 2376 |
2435 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { | 2377 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
2436 int aPos = str.Find(L'@'); | 2378 int aPos = str.Find(L'@'); |
2437 // Invalid when no '@'. | 2379 // Invalid when no '@'. |
2438 if (aPos < 1) { | 2380 if (aPos < 1) |
2439 return FALSE; | 2381 return false; |
2440 } | |
2441 | 2382 |
2442 // Check the local part. | 2383 // Check the local part. |
2443 int pPos = aPos; // Used to track the position of '@' or '.'. | 2384 int pPos = aPos; // Used to track the position of '@' or '.'. |
2444 for (int i = aPos - 1; i >= 0; i--) { | 2385 for (int i = aPos - 1; i >= 0; i--) { |
2445 FX_WCHAR ch = str.GetAt(i); | 2386 FX_WCHAR ch = str.GetAt(i); |
2446 if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) { | 2387 if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) |
2447 continue; | 2388 continue; |
2448 } | 2389 |
2449 if (ch != L'.' || i == pPos - 1 || i == 0) { | 2390 if (ch != L'.' || i == pPos - 1 || i == 0) { |
2450 if (i == aPos - 1) { | 2391 if (i == aPos - 1) { |
2451 // There is '.' or invalid char before '@'. | 2392 // There is '.' or invalid char before '@'. |
2452 return FALSE; | 2393 return FALSE; |
2453 } | 2394 } |
2454 // End extracting for other invalid chars, '.' at the beginning, or | 2395 // End extracting for other invalid chars, '.' at the beginning, or |
2455 // consecutive '.'. | 2396 // consecutive '.'. |
2456 int removed_len = i == pPos - 1 ? i + 2 : i + 1; | 2397 int removed_len = i == pPos - 1 ? i + 2 : i + 1; |
2457 str = str.Right(str.GetLength() - removed_len); | 2398 str = str.Right(str.GetLength() - removed_len); |
2458 break; | 2399 break; |
2459 } | 2400 } |
2460 // Found a valid '.'. | 2401 // Found a valid '.'. |
2461 pPos = i; | 2402 pPos = i; |
2462 } | 2403 } |
2463 | 2404 |
2464 // Check the domain name part. | 2405 // Check the domain name part. |
2465 aPos = str.Find(L'@'); | 2406 aPos = str.Find(L'@'); |
2466 if (aPos < 1) { | 2407 if (aPos < 1) |
2467 return FALSE; | 2408 return false; |
2468 } | 2409 |
2469 str.TrimRight(L'.'); | 2410 str.TrimRight(L'.'); |
2470 // At least one '.' in domain name, but not at the beginning. | 2411 // At least one '.' in domain name, but not at the beginning. |
2471 // TODO(weili): RFC5322 allows domain names to be a local name without '.'. | 2412 // TODO(weili): RFC5322 allows domain names to be a local name without '.'. |
2472 // Check whether we should remove this check. | 2413 // Check whether we should remove this check. |
2473 int ePos = str.Find(L'.', aPos + 1); | 2414 int ePos = str.Find(L'.', aPos + 1); |
2474 if (ePos == -1 || ePos == aPos + 1) { | 2415 if (ePos == -1 || ePos == aPos + 1) |
2475 return FALSE; | 2416 return false; |
2476 } | 2417 |
2477 // Validate all other chars in domain name. | 2418 // Validate all other chars in domain name. |
2478 int nLen = str.GetLength(); | 2419 int nLen = str.GetLength(); |
2479 pPos = 0; // Used to track the position of '.'. | 2420 pPos = 0; // Used to track the position of '.'. |
2480 for (int i = aPos + 1; i < nLen; i++) { | 2421 for (int i = aPos + 1; i < nLen; i++) { |
2481 FX_WCHAR wch = str.GetAt(i); | 2422 FX_WCHAR wch = str.GetAt(i); |
2482 if (wch == L'-' || FXSYS_iswalnum(wch)) { | 2423 if (wch == L'-' || FXSYS_iswalnum(wch)) |
2483 continue; | 2424 continue; |
2484 } | 2425 |
2485 if (wch != L'.' || i == pPos + 1) { | 2426 if (wch != L'.' || i == pPos + 1) { |
2486 // Domain name should end before invalid char. | 2427 // Domain name should end before invalid char. |
2487 int host_end = i == pPos + 1 ? i - 2 : i - 1; | 2428 int host_end = i == pPos + 1 ? i - 2 : i - 1; |
2488 if (pPos > 0 && host_end - aPos >= 3) { | 2429 if (pPos > 0 && host_end - aPos >= 3) { |
2489 // Trim the ending invalid chars if there is at least one '.' and name. | 2430 // Trim the ending invalid chars if there is at least one '.' and name. |
2490 str = str.Left(host_end + 1); | 2431 str = str.Left(host_end + 1); |
2491 break; | 2432 break; |
2492 } | 2433 } |
2493 return FALSE; | 2434 return false; |
2494 } | 2435 } |
2495 pPos = i; | 2436 pPos = i; |
2496 } | 2437 } |
2497 | 2438 |
2498 if (str.Find(L"mailto:") == -1) { | 2439 if (str.Find(L"mailto:") == -1) |
2499 str = L"mailto:" + str; | 2440 str = L"mailto:" + str; |
2500 } | 2441 |
2501 return TRUE; | 2442 return true; |
2502 } | 2443 } |
2503 | 2444 |
2504 void CPDF_LinkExtract::AppendToLinkList(int start, | 2445 CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const { |
2505 int count, | 2446 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
2506 const CFX_WideString& strUrl) { | |
2507 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | |
2508 linkInfo->m_strUrl = strUrl; | |
2509 linkInfo->m_Start = start; | |
2510 linkInfo->m_Count = count; | |
2511 m_LinkList.Add(linkInfo); | |
2512 } | 2447 } |
2513 | 2448 |
2514 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | 2449 void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const { |
2515 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2450 if (index < m_LinkArray.size()) { |
2516 return L""; | 2451 m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
| 2452 m_LinkArray[index].m_Count, pRects); |
2517 } | 2453 } |
2518 CPDF_LinkExt* link = NULL; | |
2519 link = m_LinkList.GetAt(index); | |
2520 if (!link) { | |
2521 return L""; | |
2522 } | |
2523 return link->m_strUrl; | |
2524 } | 2454 } |
2525 void CPDF_LinkExtract::GetBoundedSegment(int index, | |
2526 int& start, | |
2527 int& count) const { | |
2528 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | |
2529 return; | |
2530 } | |
2531 CPDF_LinkExt* link = NULL; | |
2532 link = m_LinkList.GetAt(index); | |
2533 if (!link) { | |
2534 return; | |
2535 } | |
2536 start = link->m_Start; | |
2537 count = link->m_Count; | |
2538 } | |
2539 | |
2540 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | |
2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | |
2542 return; | |
2543 } | |
2544 CPDF_LinkExt* link = NULL; | |
2545 link = m_LinkList.GetAt(index); | |
2546 if (!link) { | |
2547 return; | |
2548 } | |
2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | |
2550 } | |
OLD | NEW |