Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(450)

Side by Side Diff: core/fpdftext/fpdf_text_int.cpp

Issue 1896303002: Remove CFX_ArrayTemplate from CPDF_LinkExtract (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: No RVO for you. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <cctype> 8 #include <cctype>
9 #include <cwctype> 9 #include <cwctype>
10 #include <memory> 10 #include <memory>
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
255 return -1; 255 return -1;
256 256
257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; 257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1];
258 } 258 }
259 } 259 }
260 return -1; 260 return -1;
261 } 261 }
262 262
263 void CPDF_TextPage::GetRectArray(int start, 263 void CPDF_TextPage::GetRectArray(int start,
264 int nCount, 264 int nCount,
265 CFX_RectArray& rectArray) const { 265 CFX_RectArray* rectArray) const {
266 if (start < 0 || nCount == 0) { 266 if (start < 0 || nCount == 0) {
267 return; 267 return;
268 } 268 }
269 if (!m_bIsParsed) { 269 if (!m_bIsParsed) {
270 return; 270 return;
271 } 271 }
272 CPDF_TextObject* pCurObj = NULL; 272 CPDF_TextObject* pCurObj = NULL;
273 CFX_FloatRect rect; 273 CFX_FloatRect rect;
274 int curPos = start; 274 int curPos = start;
275 FX_BOOL flagNewRect = TRUE; 275 FX_BOOL flagNewRect = TRUE;
276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) || 276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) ||
277 nCount == -1) { 277 nCount == -1) {
278 nCount = pdfium::CollectionSize<int>(m_CharList) - start; 278 nCount = pdfium::CollectionSize<int>(m_CharList) - start;
279 } 279 }
280 while (nCount--) { 280 while (nCount--) {
281 PAGECHAR_INFO info_curchar = m_CharList[curPos++]; 281 PAGECHAR_INFO info_curchar = m_CharList[curPos++];
282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { 282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
283 continue; 283 continue;
284 } 284 }
285 if (info_curchar.m_CharBox.Width() < 0.01 || 285 if (info_curchar.m_CharBox.Width() < 0.01 ||
286 info_curchar.m_CharBox.Height() < 0.01) { 286 info_curchar.m_CharBox.Height() < 0.01) {
287 continue; 287 continue;
288 } 288 }
289 if (!pCurObj) { 289 if (!pCurObj) {
290 pCurObj = info_curchar.m_pTextObj; 290 pCurObj = info_curchar.m_pTextObj;
291 } 291 }
292 if (pCurObj != info_curchar.m_pTextObj) { 292 if (pCurObj != info_curchar.m_pTextObj) {
293 rectArray.Add(rect); 293 rectArray->Add(rect);
294 pCurObj = info_curchar.m_pTextObj; 294 pCurObj = info_curchar.m_pTextObj;
295 flagNewRect = TRUE; 295 flagNewRect = TRUE;
296 } 296 }
297 if (flagNewRect) { 297 if (flagNewRect) {
298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY; 298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY;
299 CFX_Matrix matrix, matrix_reverse; 299 CFX_Matrix matrix, matrix_reverse;
300 info_curchar.m_pTextObj->GetTextMatrix(&matrix); 300 info_curchar.m_pTextObj->GetTextMatrix(&matrix);
301 matrix.Concat(info_curchar.m_Matrix); 301 matrix.Concat(info_curchar.m_Matrix);
302 matrix_reverse.SetReverse(matrix); 302 matrix_reverse.SetReverse(matrix);
303 matrix_reverse.Transform(orgX, orgY); 303 matrix_reverse.Transform(orgX, orgY);
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
336 rect.right = info_curchar.m_CharBox.right; 336 rect.right = info_curchar.m_CharBox.right;
337 } 337 }
338 if (rect.top < info_curchar.m_CharBox.top) { 338 if (rect.top < info_curchar.m_CharBox.top) {
339 rect.top = info_curchar.m_CharBox.top; 339 rect.top = info_curchar.m_CharBox.top;
340 } 340 }
341 if (rect.bottom > info_curchar.m_CharBox.bottom) { 341 if (rect.bottom > info_curchar.m_CharBox.bottom) {
342 rect.bottom = info_curchar.m_CharBox.bottom; 342 rect.bottom = info_curchar.m_CharBox.bottom;
343 } 343 }
344 } 344 }
345 } 345 }
346 rectArray.Add(rect); 346 rectArray->Add(rect);
347 } 347 }
348 348
349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point, 349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point,
350 FX_FLOAT xTolerance, 350 FX_FLOAT xTolerance,
351 FX_FLOAT yTolerance) const { 351 FX_FLOAT yTolerance) const {
352 if (!m_bIsParsed) 352 if (!m_bIsParsed)
353 return -3; 353 return -3;
354 354
355 int pos = 0; 355 int pos = 0;
356 int NearPos = -1; 356 int NearPos = -1;
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
585 585
586 int CPDF_TextPage::CountRects(int start, int nCount) { 586 int CPDF_TextPage::CountRects(int start, int nCount) {
587 if (!m_bIsParsed || start < 0) 587 if (!m_bIsParsed || start < 0)
588 return -1; 588 return -1;
589 589
590 if (nCount == -1 || 590 if (nCount == -1 ||
591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { 591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) {
592 nCount = pdfium::CollectionSize<int>(m_CharList) - start; 592 nCount = pdfium::CollectionSize<int>(m_CharList) - start;
593 } 593 }
594 m_SelRects.RemoveAll(); 594 m_SelRects.RemoveAll();
595 GetRectArray(start, nCount, m_SelRects); 595 GetRectArray(start, nCount, &m_SelRects);
596 return m_SelRects.GetSize(); 596 return m_SelRects.GetSize();
597 } 597 }
598 598
599 void CPDF_TextPage::GetRect(int rectIndex, 599 void CPDF_TextPage::GetRect(int rectIndex,
600 FX_FLOAT& left, 600 FX_FLOAT& left,
601 FX_FLOAT& top, 601 FX_FLOAT& top,
602 FX_FLOAT& right, 602 FX_FLOAT& right,
603 FX_FLOAT& bottom) const { 603 FX_FLOAT& bottom) const {
604 if (!m_bIsParsed) 604 if (!m_bIsParsed)
605 return; 605 return;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
642 Rotate = (int)(a * 180 / FX_PI + 0.5); 642 Rotate = (int)(a * 180 / FX_PI + 0.5);
643 } 643 }
644 if (Rotate < 0) { 644 if (Rotate < 0) {
645 Rotate = -Rotate; 645 Rotate = -Rotate;
646 } else if (Rotate > 0) { 646 } else if (Rotate > 0) {
647 Rotate = 360 - Rotate; 647 Rotate = 360 - Rotate;
648 } 648 }
649 return TRUE; 649 return TRUE;
650 } 650 }
651 651
652 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
653 int& Rotate) {
654 int start, end, count,
655 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom,
656 TRUE);
657 if (n < 1) {
658 return FALSE;
659 }
660 if (n > 1) {
661 GetBoundedSegment(n - 1, start, count);
662 end = start + count - 1;
663 GetBoundedSegment(0, start, count);
664 } else {
665 GetBoundedSegment(0, start, count);
666 end = start + count - 1;
667 }
668 return GetBaselineRotate(start, end, Rotate);
669 }
670 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
671 if (!m_bIsParsed)
672 return FALSE;
673
674 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
675 return FALSE;
676
677 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
678 return GetBaselineRotate(rect, Rotate);
679 }
680
681 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, 652 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
682 FX_FLOAT top, 653 FX_FLOAT top,
683 FX_FLOAT right, 654 FX_FLOAT right,
684 FX_FLOAT bottom, 655 FX_FLOAT bottom,
685 FX_BOOL bContains) { 656 FX_BOOL bContains) {
686 m_Segments.RemoveAll(); 657 m_Segments.RemoveAll();
687 if (!m_bIsParsed) 658 if (!m_bIsParsed)
688 return -1; 659 return -1;
689 660
690 CFX_FloatRect rect(left, bottom, right, top); 661 CFX_FloatRect rect(left, bottom, right, top);
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
749 } 720 }
750 if (segmentStatus == 1) { 721 if (segmentStatus == 1) {
751 segmentStatus = 2; 722 segmentStatus = 2;
752 m_Segments.Add(segment); 723 m_Segments.Add(segment);
753 segment.m_Start = 0; 724 segment.m_Start = 0;
754 segment.m_nCount = 0; 725 segment.m_nCount = 0;
755 } 726 }
756 return m_Segments.GetSize(); 727 return m_Segments.GetSize();
757 } 728 }
758 729
759 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
760 if (index < 0 || index >= m_Segments.GetSize()) {
761 return;
762 }
763 start = m_Segments.GetAt(index).m_Start;
764 count = m_Segments.GetAt(index).m_nCount;
765 }
766
767 int CPDF_TextPage::GetWordBreak(int index, int direction) const { 730 int CPDF_TextPage::GetWordBreak(int index, int direction) const {
768 if (!m_bIsParsed) 731 if (!m_bIsParsed)
769 return -1; 732 return -1;
770 733
771 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) 734 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
772 return -1; 735 return -1;
773 736
774 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) 737 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList))
775 return -1; 738 return -1;
776 739
(...skipping 1342 matching lines...) Expand 10 before | Expand all | Expand 10 after
2119 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); 2082 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
2120 } else { 2083 } else {
2121 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); 2084 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
2122 } 2085 }
2123 } 2086 }
2124 } 2087 }
2125 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1; 2088 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1;
2126 m_IsFind = TRUE; 2089 m_IsFind = TRUE;
2127 int resStart = GetCharIndex(m_resStart); 2090 int resStart = GetCharIndex(m_resStart);
2128 int resEnd = GetCharIndex(m_resEnd); 2091 int resEnd = GetCharIndex(m_resEnd);
2129 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); 2092 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray);
2130 if (m_flags & FPDFTEXT_CONSECUTIVE) { 2093 if (m_flags & FPDFTEXT_CONSECUTIVE) {
2131 m_findNextStart = m_resStart + 1; 2094 m_findNextStart = m_resStart + 1;
2132 m_findPreStart = m_resEnd - 1; 2095 m_findPreStart = m_resEnd - 1;
2133 } else { 2096 } else {
2134 m_findNextStart = m_resEnd + 1; 2097 m_findNextStart = m_resEnd + 1;
2135 m_findPreStart = m_resStart - 1; 2098 m_findPreStart = m_resStart - 1;
2136 } 2099 }
2137 return m_IsFind; 2100 return m_IsFind;
2138 } 2101 }
2139 2102
(...skipping 25 matching lines...) Expand all
2165 MatchedCount = MatchedCount1; 2128 MatchedCount = MatchedCount1;
2166 } 2129 }
2167 } 2130 }
2168 if (order == -1) { 2131 if (order == -1) {
2169 m_IsFind = FALSE; 2132 m_IsFind = FALSE;
2170 return m_IsFind; 2133 return m_IsFind;
2171 } 2134 }
2172 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); 2135 m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
2173 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); 2136 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
2174 m_IsFind = TRUE; 2137 m_IsFind = TRUE;
2175 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); 2138 m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray);
2176 if (m_flags & FPDFTEXT_CONSECUTIVE) { 2139 if (m_flags & FPDFTEXT_CONSECUTIVE) {
2177 m_findNextStart = m_resStart + 1; 2140 m_findNextStart = m_resStart + 1;
2178 m_findPreStart = m_resEnd - 1; 2141 m_findPreStart = m_resEnd - 1;
2179 } else { 2142 } else {
2180 m_findNextStart = m_resEnd + 1; 2143 m_findNextStart = m_resEnd + 1;
2181 m_findPreStart = m_resStart - 1; 2144 m_findPreStart = m_resStart - 1;
2182 } 2145 }
2183 return m_IsFind; 2146 return m_IsFind;
2184 } 2147 }
2185 2148
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
2323 int CPDF_TextPageFind::GetCurOrder() const { 2286 int CPDF_TextPageFind::GetCurOrder() const {
2324 return GetCharIndex(m_resStart); 2287 return GetCharIndex(m_resStart);
2325 } 2288 }
2326 2289
2327 int CPDF_TextPageFind::GetMatchedCount() const { 2290 int CPDF_TextPageFind::GetMatchedCount() const {
2328 int resStart = GetCharIndex(m_resStart); 2291 int resStart = GetCharIndex(m_resStart);
2329 int resEnd = GetCharIndex(m_resEnd); 2292 int resEnd = GetCharIndex(m_resEnd);
2330 return resEnd - resStart + 1; 2293 return resEnd - resStart + 1;
2331 } 2294 }
2332 2295
2333 CPDF_LinkExtract::CPDF_LinkExtract() 2296 CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage)
2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} 2297 : m_pTextPage(pTextPage) {}
2335 2298
2336 CPDF_LinkExtract::~CPDF_LinkExtract() { 2299 CPDF_LinkExtract::~CPDF_LinkExtract() {
2337 DeleteLinkList();
2338 } 2300 }
2339 2301
2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { 2302 void CPDF_LinkExtract::ExtractLinks() {
2341 if (!pTextPage || !pTextPage->IsParsed()) 2303 m_LinkArray.clear();
2342 return FALSE; 2304 if (!m_pTextPage->IsParsed())
2305 return;
2343 2306
2344 m_pTextPage = (const CPDF_TextPage*)pTextPage;
2345 m_strPageText = m_pTextPage->GetPageText(0, -1); 2307 m_strPageText = m_pTextPage->GetPageText(0, -1);
2346 DeleteLinkList(); 2308 if (m_strPageText.IsEmpty())
2347 if (m_strPageText.IsEmpty()) { 2309 return;
2348 return FALSE; 2310
2349 }
2350 ParseLink(); 2311 ParseLink();
2351 m_bIsParsed = true;
2352 return TRUE;
2353 }
2354
2355 void CPDF_LinkExtract::DeleteLinkList() {
2356 while (m_LinkList.GetSize()) {
2357 CPDF_LinkExt* linkinfo = NULL;
2358 linkinfo = m_LinkList.GetAt(0);
2359 m_LinkList.RemoveAt(0);
2360 delete linkinfo;
2361 }
2362 m_LinkList.RemoveAll();
2363 }
2364
2365 int CPDF_LinkExtract::CountLinks() const {
2366 if (!m_bIsParsed) {
2367 return -1;
2368 }
2369 return m_LinkList.GetSize();
2370 } 2312 }
2371 2313
2372 void CPDF_LinkExtract::ParseLink() { 2314 void CPDF_LinkExtract::ParseLink() {
2373 int start = 0, pos = 0; 2315 int start = 0, pos = 0;
2374 int TotalChar = m_pTextPage->CountChars(); 2316 int TotalChar = m_pTextPage->CountChars();
2375 while (pos < TotalChar) { 2317 while (pos < TotalChar) {
2376 FPDF_CHAR_INFO pageChar; 2318 FPDF_CHAR_INFO pageChar;
2377 m_pTextPage->GetCharInfo(pos, &pageChar); 2319 m_pTextPage->GetCharInfo(pos, &pageChar);
2378 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || 2320 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED ||
2379 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { 2321 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) {
2380 int nCount = pos - start; 2322 int nCount = pos - start;
2381 if (pos == TotalChar - 1) { 2323 if (pos == TotalChar - 1) {
2382 nCount++; 2324 nCount++;
2383 } 2325 }
2384 CFX_WideString strBeCheck; 2326 CFX_WideString strBeCheck;
2385 strBeCheck = m_pTextPage->GetPageText(start, nCount); 2327 strBeCheck = m_pTextPage->GetPageText(start, nCount);
2386 if (strBeCheck.GetLength() > 5) { 2328 if (strBeCheck.GetLength() > 5) {
2387 while (strBeCheck.GetLength() > 0) { 2329 while (strBeCheck.GetLength() > 0) {
2388 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); 2330 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);
2389 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { 2331 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') {
2390 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); 2332 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);
2391 nCount--; 2333 nCount--;
2392 } else { 2334 } else {
2393 break; 2335 break;
2394 } 2336 }
2395 } 2337 }
2396 if (nCount > 5 && 2338 if (nCount > 5 &&
2397 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { 2339 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
2398 AppendToLinkList(start, nCount, strBeCheck); 2340 m_LinkArray.push_back({start, nCount, strBeCheck});
2399 } 2341 }
2400 } 2342 }
2401 start = ++pos; 2343 start = ++pos;
2402 } else { 2344 } else {
2403 pos++; 2345 pos++;
2404 } 2346 }
2405 } 2347 }
2406 } 2348 }
2407 2349
2408 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { 2350 bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
2409 CFX_WideString str = strBeCheck; 2351 CFX_WideString str = strBeCheck;
2410 str.MakeLower(); 2352 str.MakeLower();
2411 if (str.Find(L"http://www.") != -1) { 2353 if (str.Find(L"http://www.") != -1) {
2412 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); 2354 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
2413 return TRUE; 2355 return true;
2414 } 2356 }
2415 if (str.Find(L"http://") != -1) { 2357 if (str.Find(L"http://") != -1) {
2416 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); 2358 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
2417 return TRUE; 2359 return true;
2418 } 2360 }
2419 if (str.Find(L"https://www.") != -1) { 2361 if (str.Find(L"https://www.") != -1) {
2420 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); 2362 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
2421 return TRUE; 2363 return true;
2422 } 2364 }
2423 if (str.Find(L"https://") != -1) { 2365 if (str.Find(L"https://") != -1) {
2424 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); 2366 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
2425 return TRUE; 2367 return true;
2426 } 2368 }
2427 if (str.Find(L"www.") != -1) { 2369 if (str.Find(L"www.") != -1) {
2428 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); 2370 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
2429 strBeCheck = L"http://" + strBeCheck; 2371 strBeCheck = L"http://" + strBeCheck;
2430 return TRUE; 2372 return true;
2431 } 2373 }
2432 return FALSE; 2374 return false;
2433 } 2375 }
2434 2376
2435 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { 2377 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
2436 int aPos = str.Find(L'@'); 2378 int aPos = str.Find(L'@');
2437 // Invalid when no '@'. 2379 // Invalid when no '@'.
2438 if (aPos < 1) { 2380 if (aPos < 1)
2439 return FALSE; 2381 return false;
2440 }
2441 2382
2442 // Check the local part. 2383 // Check the local part.
2443 int pPos = aPos; // Used to track the position of '@' or '.'. 2384 int pPos = aPos; // Used to track the position of '@' or '.'.
2444 for (int i = aPos - 1; i >= 0; i--) { 2385 for (int i = aPos - 1; i >= 0; i--) {
2445 FX_WCHAR ch = str.GetAt(i); 2386 FX_WCHAR ch = str.GetAt(i);
2446 if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) { 2387 if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
2447 continue; 2388 continue;
2448 } 2389
2449 if (ch != L'.' || i == pPos - 1 || i == 0) { 2390 if (ch != L'.' || i == pPos - 1 || i == 0) {
2450 if (i == aPos - 1) { 2391 if (i == aPos - 1) {
2451 // There is '.' or invalid char before '@'. 2392 // There is '.' or invalid char before '@'.
2452 return FALSE; 2393 return FALSE;
2453 } 2394 }
2454 // End extracting for other invalid chars, '.' at the beginning, or 2395 // End extracting for other invalid chars, '.' at the beginning, or
2455 // consecutive '.'. 2396 // consecutive '.'.
2456 int removed_len = i == pPos - 1 ? i + 2 : i + 1; 2397 int removed_len = i == pPos - 1 ? i + 2 : i + 1;
2457 str = str.Right(str.GetLength() - removed_len); 2398 str = str.Right(str.GetLength() - removed_len);
2458 break; 2399 break;
2459 } 2400 }
2460 // Found a valid '.'. 2401 // Found a valid '.'.
2461 pPos = i; 2402 pPos = i;
2462 } 2403 }
2463 2404
2464 // Check the domain name part. 2405 // Check the domain name part.
2465 aPos = str.Find(L'@'); 2406 aPos = str.Find(L'@');
2466 if (aPos < 1) { 2407 if (aPos < 1)
2467 return FALSE; 2408 return false;
2468 } 2409
2469 str.TrimRight(L'.'); 2410 str.TrimRight(L'.');
2470 // At least one '.' in domain name, but not at the beginning. 2411 // At least one '.' in domain name, but not at the beginning.
2471 // TODO(weili): RFC5322 allows domain names to be a local name without '.'. 2412 // TODO(weili): RFC5322 allows domain names to be a local name without '.'.
2472 // Check whether we should remove this check. 2413 // Check whether we should remove this check.
2473 int ePos = str.Find(L'.', aPos + 1); 2414 int ePos = str.Find(L'.', aPos + 1);
2474 if (ePos == -1 || ePos == aPos + 1) { 2415 if (ePos == -1 || ePos == aPos + 1)
2475 return FALSE; 2416 return false;
2476 } 2417
2477 // Validate all other chars in domain name. 2418 // Validate all other chars in domain name.
2478 int nLen = str.GetLength(); 2419 int nLen = str.GetLength();
2479 pPos = 0; // Used to track the position of '.'. 2420 pPos = 0; // Used to track the position of '.'.
2480 for (int i = aPos + 1; i < nLen; i++) { 2421 for (int i = aPos + 1; i < nLen; i++) {
2481 FX_WCHAR wch = str.GetAt(i); 2422 FX_WCHAR wch = str.GetAt(i);
2482 if (wch == L'-' || FXSYS_iswalnum(wch)) { 2423 if (wch == L'-' || FXSYS_iswalnum(wch))
2483 continue; 2424 continue;
2484 } 2425
2485 if (wch != L'.' || i == pPos + 1) { 2426 if (wch != L'.' || i == pPos + 1) {
2486 // Domain name should end before invalid char. 2427 // Domain name should end before invalid char.
2487 int host_end = i == pPos + 1 ? i - 2 : i - 1; 2428 int host_end = i == pPos + 1 ? i - 2 : i - 1;
2488 if (pPos > 0 && host_end - aPos >= 3) { 2429 if (pPos > 0 && host_end - aPos >= 3) {
2489 // Trim the ending invalid chars if there is at least one '.' and name. 2430 // Trim the ending invalid chars if there is at least one '.' and name.
2490 str = str.Left(host_end + 1); 2431 str = str.Left(host_end + 1);
2491 break; 2432 break;
2492 } 2433 }
2493 return FALSE; 2434 return false;
2494 } 2435 }
2495 pPos = i; 2436 pPos = i;
2496 } 2437 }
2497 2438
2498 if (str.Find(L"mailto:") == -1) { 2439 if (str.Find(L"mailto:") == -1)
2499 str = L"mailto:" + str; 2440 str = L"mailto:" + str;
2500 } 2441
2501 return TRUE; 2442 return true;
2502 } 2443 }
2503 2444
2504 void CPDF_LinkExtract::AppendToLinkList(int start, 2445 CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {
2505 int count, 2446 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
2506 const CFX_WideString& strUrl) {
2507 CPDF_LinkExt* linkInfo = new CPDF_LinkExt;
2508 linkInfo->m_strUrl = strUrl;
2509 linkInfo->m_Start = start;
2510 linkInfo->m_Count = count;
2511 m_LinkList.Add(linkInfo);
2512 } 2447 }
2513 2448
2514 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { 2449 void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const {
2515 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2450 if (index < m_LinkArray.size()) {
2516 return L""; 2451 m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
2452 m_LinkArray[index].m_Count, pRects);
2517 } 2453 }
2518 CPDF_LinkExt* link = NULL;
2519 link = m_LinkList.GetAt(index);
2520 if (!link) {
2521 return L"";
2522 }
2523 return link->m_strUrl;
2524 } 2454 }
2525 void CPDF_LinkExtract::GetBoundedSegment(int index,
2526 int& start,
2527 int& count) const {
2528 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2529 return;
2530 }
2531 CPDF_LinkExt* link = NULL;
2532 link = m_LinkList.GetAt(index);
2533 if (!link) {
2534 return;
2535 }
2536 start = link->m_Start;
2537 count = link->m_Count;
2538 }
2539
2540 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2542 return;
2543 }
2544 CPDF_LinkExt* link = NULL;
2545 link = m_LinkList.GetAt(index);
2546 if (!link) {
2547 return;
2548 }
2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2550 }
OLDNEW
« no previous file with comments | « no previous file | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698