core/fpdftext/fpdf_text_int.cpp - Issue 1896303002: Remove CFX_ArrayTemplate from CPDF_LinkExtract

Side by Side Diff: core/fpdftext/fpdf_text_int.cpp

Issue 1896303002: Remove CFX_ArrayTemplate from CPDF_LinkExtract (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Patch Set: No RVO for you. Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 PDFium Authors. All rights reserved.	1 // Copyright 2014 PDFium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <cctype>	8 #include <cctype>

9 #include <cwctype>	9 #include <cwctype>

10 #include <memory>	10 #include <memory>

(...skipping 244 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
255 return -1;	255 return -1;

256	256

257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1];	257 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1];

258 }	258 }

259 }	259 }

260 return -1;	260 return -1;

261 }	261 }

262	262

263 void CPDF_TextPage::GetRectArray(int start,	263 void CPDF_TextPage::GetRectArray(int start,

264 int nCount,	264 int nCount,

265 CFX_RectArray& rectArray) const {	265 CFX_RectArray* rectArray) const {

266 if (start < 0 \|\| nCount == 0) {	266 if (start < 0 \|\| nCount == 0) {

267 return;	267 return;

268 }	268 }

269 if (!m_bIsParsed) {	269 if (!m_bIsParsed) {

270 return;	270 return;

271 }	271 }

272 CPDF_TextObject* pCurObj = NULL;	272 CPDF_TextObject* pCurObj = NULL;

273 CFX_FloatRect rect;	273 CFX_FloatRect rect;

274 int curPos = start;	274 int curPos = start;

275 FX_BOOL flagNewRect = TRUE;	275 FX_BOOL flagNewRect = TRUE;

276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) \|\|	276 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) \|\|

277 nCount == -1) {	277 nCount == -1) {

278 nCount = pdfium::CollectionSize<int>(m_CharList) - start;	278 nCount = pdfium::CollectionSize<int>(m_CharList) - start;

279 }	279 }

280 while (nCount--) {	280 while (nCount--) {

281 PAGECHAR_INFO info_curchar = m_CharList[curPos++];	281 PAGECHAR_INFO info_curchar = m_CharList[curPos++];

282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {	282 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {

283 continue;	283 continue;

284 }	284 }

285 if (info_curchar.m_CharBox.Width() < 0.01 \|\|	285 if (info_curchar.m_CharBox.Width() < 0.01 \|\|

286 info_curchar.m_CharBox.Height() < 0.01) {	286 info_curchar.m_CharBox.Height() < 0.01) {

287 continue;	287 continue;

288 }	288 }

289 if (!pCurObj) {	289 if (!pCurObj) {

290 pCurObj = info_curchar.m_pTextObj;	290 pCurObj = info_curchar.m_pTextObj;

291 }	291 }

292 if (pCurObj != info_curchar.m_pTextObj) {	292 if (pCurObj != info_curchar.m_pTextObj) {

293 rectArray.Add(rect);	293 rectArray->Add(rect);

294 pCurObj = info_curchar.m_pTextObj;	294 pCurObj = info_curchar.m_pTextObj;

295 flagNewRect = TRUE;	295 flagNewRect = TRUE;

296 }	296 }

297 if (flagNewRect) {	297 if (flagNewRect) {

298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY;	298 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY;

299 CFX_Matrix matrix, matrix_reverse;	299 CFX_Matrix matrix, matrix_reverse;

300 info_curchar.m_pTextObj->GetTextMatrix(&matrix);	300 info_curchar.m_pTextObj->GetTextMatrix(&matrix);

301 matrix.Concat(info_curchar.m_Matrix);	301 matrix.Concat(info_curchar.m_Matrix);

302 matrix_reverse.SetReverse(matrix);	302 matrix_reverse.SetReverse(matrix);

303 matrix_reverse.Transform(orgX, orgY);	303 matrix_reverse.Transform(orgX, orgY);

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
336 rect.right = info_curchar.m_CharBox.right;	336 rect.right = info_curchar.m_CharBox.right;

337 }	337 }

338 if (rect.top < info_curchar.m_CharBox.top) {	338 if (rect.top < info_curchar.m_CharBox.top) {

339 rect.top = info_curchar.m_CharBox.top;	339 rect.top = info_curchar.m_CharBox.top;

340 }	340 }

341 if (rect.bottom > info_curchar.m_CharBox.bottom) {	341 if (rect.bottom > info_curchar.m_CharBox.bottom) {

342 rect.bottom = info_curchar.m_CharBox.bottom;	342 rect.bottom = info_curchar.m_CharBox.bottom;

343 }	343 }

344 }	344 }

345 }	345 }

346 rectArray.Add(rect);	346 rectArray->Add(rect);

347 }	347 }

348	348

349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point,	349 int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point,

350 FX_FLOAT xTolerance,	350 FX_FLOAT xTolerance,

351 FX_FLOAT yTolerance) const {	351 FX_FLOAT yTolerance) const {

352 if (!m_bIsParsed)	352 if (!m_bIsParsed)

353 return -3;	353 return -3;

354	354

355 int pos = 0;	355 int pos = 0;

356 int NearPos = -1;	356 int NearPos = -1;

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
585	585

586 int CPDF_TextPage::CountRects(int start, int nCount) {	586 int CPDF_TextPage::CountRects(int start, int nCount) {

587 if (!m_bIsParsed \|\| start < 0)	587 if (!m_bIsParsed \|\| start < 0)

588 return -1;	588 return -1;

589	589

590 if (nCount == -1 \|\|	590 if (nCount == -1 \|\|

591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) {	591 nCount + start > pdfium::CollectionSize<int>(m_CharList)) {

592 nCount = pdfium::CollectionSize<int>(m_CharList) - start;	592 nCount = pdfium::CollectionSize<int>(m_CharList) - start;

593 }	593 }

594 m_SelRects.RemoveAll();	594 m_SelRects.RemoveAll();

595 GetRectArray(start, nCount, m_SelRects);	595 GetRectArray(start, nCount, &m_SelRects);

596 return m_SelRects.GetSize();	596 return m_SelRects.GetSize();

597 }	597 }

598	598

599 void CPDF_TextPage::GetRect(int rectIndex,	599 void CPDF_TextPage::GetRect(int rectIndex,

600 FX_FLOAT& left,	600 FX_FLOAT& left,

601 FX_FLOAT& top,	601 FX_FLOAT& top,

602 FX_FLOAT& right,	602 FX_FLOAT& right,

603 FX_FLOAT& bottom) const {	603 FX_FLOAT& bottom) const {

604 if (!m_bIsParsed)	604 if (!m_bIsParsed)

605 return;	605 return;

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
642 Rotate = (int)(a * 180 / FX_PI + 0.5);	642 Rotate = (int)(a * 180 / FX_PI + 0.5);

643 }	643 }

644 if (Rotate < 0) {	644 if (Rotate < 0) {

645 Rotate = -Rotate;	645 Rotate = -Rotate;

646 } else if (Rotate > 0) {	646 } else if (Rotate > 0) {

647 Rotate = 360 - Rotate;	647 Rotate = 360 - Rotate;

648 }	648 }

649 return TRUE;	649 return TRUE;

650 }	650 }

651	651

652 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,

653 int& Rotate) {

654 int start, end, count,

655 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom,

656 TRUE);

657 if (n < 1) {

658 return FALSE;

659 }

660 if (n > 1) {

661 GetBoundedSegment(n - 1, start, count);

662 end = start + count - 1;

663 GetBoundedSegment(0, start, count);

664 } else {

665 GetBoundedSegment(0, start, count);

666 end = start + count - 1;

667 }

668 return GetBaselineRotate(start, end, Rotate);

669 }

670 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {

671 if (!m_bIsParsed)

672 return FALSE;

673

674 if (rectIndex < 0 \|\| rectIndex >= m_SelRects.GetSize())

675 return FALSE;

676

677 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);

678 return GetBaselineRotate(rect, Rotate);

679 }

680

681 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,	652 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,

682 FX_FLOAT top,	653 FX_FLOAT top,

683 FX_FLOAT right,	654 FX_FLOAT right,

684 FX_FLOAT bottom,	655 FX_FLOAT bottom,

685 FX_BOOL bContains) {	656 FX_BOOL bContains) {

686 m_Segments.RemoveAll();	657 m_Segments.RemoveAll();

687 if (!m_bIsParsed)	658 if (!m_bIsParsed)

688 return -1;	659 return -1;

689	660

690 CFX_FloatRect rect(left, bottom, right, top);	661 CFX_FloatRect rect(left, bottom, right, top);

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
749 }	720 }

750 if (segmentStatus == 1) {	721 if (segmentStatus == 1) {

751 segmentStatus = 2;	722 segmentStatus = 2;

752 m_Segments.Add(segment);	723 m_Segments.Add(segment);

753 segment.m_Start = 0;	724 segment.m_Start = 0;

754 segment.m_nCount = 0;	725 segment.m_nCount = 0;

755 }	726 }

756 return m_Segments.GetSize();	727 return m_Segments.GetSize();

757 }	728 }

758	729

759 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {

760 if (index < 0 \|\| index >= m_Segments.GetSize()) {

761 return;

762 }

763 start = m_Segments.GetAt(index).m_Start;

764 count = m_Segments.GetAt(index).m_nCount;

765 }

766

767 int CPDF_TextPage::GetWordBreak(int index, int direction) const {	730 int CPDF_TextPage::GetWordBreak(int index, int direction) const {

768 if (!m_bIsParsed)	731 if (!m_bIsParsed)

769 return -1;	732 return -1;

770	733

771 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)	734 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)

772 return -1;	735 return -1;

773	736

774 if (index < 0 \|\| index >= pdfium::CollectionSize<int>(m_CharList))	737 if (index < 0 \|\| index >= pdfium::CollectionSize<int>(m_CharList))

775 return -1;	738 return -1;

776	739

(...skipping 1342 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2119 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();	2082 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();

2120 } else {	2083 } else {

2121 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();	2084 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();

2122 }	2085 }

2123 }	2086 }

2124 }	2087 }

2125 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1;	2088 m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1;

2126 m_IsFind = TRUE;	2089 m_IsFind = TRUE;

2127 int resStart = GetCharIndex(m_resStart);	2090 int resStart = GetCharIndex(m_resStart);

2128 int resEnd = GetCharIndex(m_resEnd);	2091 int resEnd = GetCharIndex(m_resEnd);

2129 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray);	2092 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray);

2130 if (m_flags & FPDFTEXT_CONSECUTIVE) {	2093 if (m_flags & FPDFTEXT_CONSECUTIVE) {

2131 m_findNextStart = m_resStart + 1;	2094 m_findNextStart = m_resStart + 1;

2132 m_findPreStart = m_resEnd - 1;	2095 m_findPreStart = m_resEnd - 1;

2133 } else {	2096 } else {

2134 m_findNextStart = m_resEnd + 1;	2097 m_findNextStart = m_resEnd + 1;

2135 m_findPreStart = m_resStart - 1;	2098 m_findPreStart = m_resStart - 1;

2136 }	2099 }

2137 return m_IsFind;	2100 return m_IsFind;

2138 }	2101 }

2139	2102

(...skipping 25 matching lines...) Expand all Loading...
2165 MatchedCount = MatchedCount1;	2128 MatchedCount = MatchedCount1;

2166 }	2129 }

2167 }	2130 }

2168 if (order == -1) {	2131 if (order == -1) {

2169 m_IsFind = FALSE;	2132 m_IsFind = FALSE;

2170 return m_IsFind;	2133 return m_IsFind;

2171 }	2134 }

2172 m_resStart = m_pTextPage->TextIndexFromCharIndex(order);	2135 m_resStart = m_pTextPage->TextIndexFromCharIndex(order);

2173 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);	2136 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);

2174 m_IsFind = TRUE;	2137 m_IsFind = TRUE;

2175 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray);	2138 m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray);

2176 if (m_flags & FPDFTEXT_CONSECUTIVE) {	2139 if (m_flags & FPDFTEXT_CONSECUTIVE) {

2177 m_findNextStart = m_resStart + 1;	2140 m_findNextStart = m_resStart + 1;

2178 m_findPreStart = m_resEnd - 1;	2141 m_findPreStart = m_resEnd - 1;

2179 } else {	2142 } else {

2180 m_findNextStart = m_resEnd + 1;	2143 m_findNextStart = m_resEnd + 1;

2181 m_findPreStart = m_resStart - 1;	2144 m_findPreStart = m_resStart - 1;

2182 }	2145 }

2183 return m_IsFind;	2146 return m_IsFind;

2184 }	2147 }

2185	2148

(...skipping 137 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2323 int CPDF_TextPageFind::GetCurOrder() const {	2286 int CPDF_TextPageFind::GetCurOrder() const {

2324 return GetCharIndex(m_resStart);	2287 return GetCharIndex(m_resStart);

2325 }	2288 }

2326	2289

2327 int CPDF_TextPageFind::GetMatchedCount() const {	2290 int CPDF_TextPageFind::GetMatchedCount() const {

2328 int resStart = GetCharIndex(m_resStart);	2291 int resStart = GetCharIndex(m_resStart);

2329 int resEnd = GetCharIndex(m_resEnd);	2292 int resEnd = GetCharIndex(m_resEnd);

2330 return resEnd - resStart + 1;	2293 return resEnd - resStart + 1;

2331 }	2294 }

2332	2295

2333 CPDF_LinkExtract::CPDF_LinkExtract()	2296 CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage)

2334 : m_pTextPage(nullptr), m_bIsParsed(false) {}	2297 : m_pTextPage(pTextPage) {}

2335	2298

2336 CPDF_LinkExtract::~CPDF_LinkExtract() {	2299 CPDF_LinkExtract::~CPDF_LinkExtract() {

2337 DeleteLinkList();

2338 }	2300 }

2339	2301

2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) {	2302 void CPDF_LinkExtract::ExtractLinks() {

2341 if (!pTextPage \|\| !pTextPage->IsParsed())	2303 m_LinkArray.clear();

2342 return FALSE;	2304 if (!m_pTextPage->IsParsed())

	2305 return;

2343	2306

2344 m_pTextPage = (const CPDF_TextPage*)pTextPage;

2345 m_strPageText = m_pTextPage->GetPageText(0, -1);	2307 m_strPageText = m_pTextPage->GetPageText(0, -1);

2346 DeleteLinkList();	2308 if (m_strPageText.IsEmpty())

2347 if (m_strPageText.IsEmpty()) {	2309 return;

2348 return FALSE;	2310

2349 }

2350 ParseLink();	2311 ParseLink();

2351 m_bIsParsed = true;

2352 return TRUE;

2353 }

2354

2355 void CPDF_LinkExtract::DeleteLinkList() {

2356 while (m_LinkList.GetSize()) {

2357 CPDF_LinkExt* linkinfo = NULL;

2358 linkinfo = m_LinkList.GetAt(0);

2359 m_LinkList.RemoveAt(0);

2360 delete linkinfo;

2361 }

2362 m_LinkList.RemoveAll();

2363 }

2364

2365 int CPDF_LinkExtract::CountLinks() const {

2366 if (!m_bIsParsed) {

2367 return -1;

2368 }

2369 return m_LinkList.GetSize();

2370 }	2312 }

2371	2313

2372 void CPDF_LinkExtract::ParseLink() {	2314 void CPDF_LinkExtract::ParseLink() {

2373 int start = 0, pos = 0;	2315 int start = 0, pos = 0;

2374 int TotalChar = m_pTextPage->CountChars();	2316 int TotalChar = m_pTextPage->CountChars();

2375 while (pos < TotalChar) {	2317 while (pos < TotalChar) {

2376 FPDF_CHAR_INFO pageChar;	2318 FPDF_CHAR_INFO pageChar;

2377 m_pTextPage->GetCharInfo(pos, &pageChar);	2319 m_pTextPage->GetCharInfo(pos, &pageChar);

2378 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED \|\|	2320 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED \|\|

2379 pageChar.m_Unicode == 0x20 \|\| pos == TotalChar - 1) {	2321 pageChar.m_Unicode == 0x20 \|\| pos == TotalChar - 1) {

2380 int nCount = pos - start;	2322 int nCount = pos - start;

2381 if (pos == TotalChar - 1) {	2323 if (pos == TotalChar - 1) {

2382 nCount++;	2324 nCount++;

2383 }	2325 }

2384 CFX_WideString strBeCheck;	2326 CFX_WideString strBeCheck;

2385 strBeCheck = m_pTextPage->GetPageText(start, nCount);	2327 strBeCheck = m_pTextPage->GetPageText(start, nCount);

2386 if (strBeCheck.GetLength() > 5) {	2328 if (strBeCheck.GetLength() > 5) {

2387 while (strBeCheck.GetLength() > 0) {	2329 while (strBeCheck.GetLength() > 0) {

2388 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);	2330 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);

2389 if (ch == L')' \|\| ch == L',' \|\| ch == L'>' \|\| ch == L'.') {	2331 if (ch == L')' \|\| ch == L',' \|\| ch == L'>' \|\| ch == L'.') {

2390 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);	2332 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);

2391 nCount--;	2333 nCount--;

2392 } else {	2334 } else {

2393 break;	2335 break;

2394 }	2336 }

2395 }	2337 }

2396 if (nCount > 5 &&	2338 if (nCount > 5 &&

2397 (CheckWebLink(strBeCheck) \|\| CheckMailLink(strBeCheck))) {	2339 (CheckWebLink(strBeCheck) \|\| CheckMailLink(strBeCheck))) {

2398 AppendToLinkList(start, nCount, strBeCheck);	2340 m_LinkArray.push_back({start, nCount, strBeCheck});

2399 }	2341 }

2400 }	2342 }

2401 start = ++pos;	2343 start = ++pos;

2402 } else {	2344 } else {

2403 pos++;	2345 pos++;

2404 }	2346 }

2405 }	2347 }

2406 }	2348 }

2407	2349

2408 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {	2350 bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {

2409 CFX_WideString str = strBeCheck;	2351 CFX_WideString str = strBeCheck;

2410 str.MakeLower();	2352 str.MakeLower();

2411 if (str.Find(L"http://www.") != -1) {	2353 if (str.Find(L"http://www.") != -1) {

2412 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));	2354 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));

2413 return TRUE;	2355 return true;

2414 }	2356 }

2415 if (str.Find(L"http://") != -1) {	2357 if (str.Find(L"http://") != -1) {

2416 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));	2358 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));

2417 return TRUE;	2359 return true;

2418 }	2360 }

2419 if (str.Find(L"https://www.") != -1) {	2361 if (str.Find(L"https://www.") != -1) {

2420 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));	2362 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));

2421 return TRUE;	2363 return true;

2422 }	2364 }

2423 if (str.Find(L"https://") != -1) {	2365 if (str.Find(L"https://") != -1) {

2424 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));	2366 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));

2425 return TRUE;	2367 return true;

2426 }	2368 }

2427 if (str.Find(L"www.") != -1) {	2369 if (str.Find(L"www.") != -1) {

2428 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));	2370 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));

2429 strBeCheck = L"http://" + strBeCheck;	2371 strBeCheck = L"http://" + strBeCheck;

2430 return TRUE;	2372 return true;

2431 }	2373 }

2432 return FALSE;	2374 return false;

2433 }	2375 }

2434	2376

2435 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {	2377 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {

2436 int aPos = str.Find(L'@');	2378 int aPos = str.Find(L'@');

2437 // Invalid when no '@'.	2379 // Invalid when no '@'.

2438 if (aPos < 1) {	2380 if (aPos < 1)

2439 return FALSE;	2381 return false;

2440 }

2441	2382

2442 // Check the local part.	2383 // Check the local part.

2443 int pPos = aPos; // Used to track the position of '@' or '.'.	2384 int pPos = aPos; // Used to track the position of '@' or '.'.

2444 for (int i = aPos - 1; i >= 0; i--) {	2385 for (int i = aPos - 1; i >= 0; i--) {

2445 FX_WCHAR ch = str.GetAt(i);	2386 FX_WCHAR ch = str.GetAt(i);

2446 if (ch == L'_' \|\| ch == L'-' \|\| FXSYS_iswalnum(ch)) {	2387 if (ch == L'_' \|\| ch == L'-' \|\| FXSYS_iswalnum(ch))

2447 continue;	2388 continue;

2448 }	2389

2449 if (ch != L'.' \|\| i == pPos - 1 \|\| i == 0) {	2390 if (ch != L'.' \|\| i == pPos - 1 \|\| i == 0) {

2450 if (i == aPos - 1) {	2391 if (i == aPos - 1) {

2451 // There is '.' or invalid char before '@'.	2392 // There is '.' or invalid char before '@'.

2452 return FALSE;	2393 return FALSE;

2453 }	2394 }

2454 // End extracting for other invalid chars, '.' at the beginning, or	2395 // End extracting for other invalid chars, '.' at the beginning, or

2455 // consecutive '.'.	2396 // consecutive '.'.

2456 int removed_len = i == pPos - 1 ? i + 2 : i + 1;	2397 int removed_len = i == pPos - 1 ? i + 2 : i + 1;

2457 str = str.Right(str.GetLength() - removed_len);	2398 str = str.Right(str.GetLength() - removed_len);

2458 break;	2399 break;

2459 }	2400 }

2460 // Found a valid '.'.	2401 // Found a valid '.'.

2461 pPos = i;	2402 pPos = i;

2462 }	2403 }

2463	2404

2464 // Check the domain name part.	2405 // Check the domain name part.

2465 aPos = str.Find(L'@');	2406 aPos = str.Find(L'@');

2466 if (aPos < 1) {	2407 if (aPos < 1)

2467 return FALSE;	2408 return false;

2468 }	2409

2469 str.TrimRight(L'.');	2410 str.TrimRight(L'.');

2470 // At least one '.' in domain name, but not at the beginning.	2411 // At least one '.' in domain name, but not at the beginning.

2471 // TODO(weili): RFC5322 allows domain names to be a local name without '.'.	2412 // TODO(weili): RFC5322 allows domain names to be a local name without '.'.

2472 // Check whether we should remove this check.	2413 // Check whether we should remove this check.

2473 int ePos = str.Find(L'.', aPos + 1);	2414 int ePos = str.Find(L'.', aPos + 1);

2474 if (ePos == -1 \|\| ePos == aPos + 1) {	2415 if (ePos == -1 \|\| ePos == aPos + 1)

2475 return FALSE;	2416 return false;

2476 }	2417

2477 // Validate all other chars in domain name.	2418 // Validate all other chars in domain name.

2478 int nLen = str.GetLength();	2419 int nLen = str.GetLength();

2479 pPos = 0; // Used to track the position of '.'.	2420 pPos = 0; // Used to track the position of '.'.

2480 for (int i = aPos + 1; i < nLen; i++) {	2421 for (int i = aPos + 1; i < nLen; i++) {

2481 FX_WCHAR wch = str.GetAt(i);	2422 FX_WCHAR wch = str.GetAt(i);

2482 if (wch == L'-' \|\| FXSYS_iswalnum(wch)) {	2423 if (wch == L'-' \|\| FXSYS_iswalnum(wch))

2483 continue;	2424 continue;

2484 }	2425

2485 if (wch != L'.' \|\| i == pPos + 1) {	2426 if (wch != L'.' \|\| i == pPos + 1) {

2486 // Domain name should end before invalid char.	2427 // Domain name should end before invalid char.

2487 int host_end = i == pPos + 1 ? i - 2 : i - 1;	2428 int host_end = i == pPos + 1 ? i - 2 : i - 1;

2488 if (pPos > 0 && host_end - aPos >= 3) {	2429 if (pPos > 0 && host_end - aPos >= 3) {

2489 // Trim the ending invalid chars if there is at least one '.' and name.	2430 // Trim the ending invalid chars if there is at least one '.' and name.

2490 str = str.Left(host_end + 1);	2431 str = str.Left(host_end + 1);

2491 break;	2432 break;

2492 }	2433 }

2493 return FALSE;	2434 return false;

2494 }	2435 }

2495 pPos = i;	2436 pPos = i;

2496 }	2437 }

2497	2438

2498 if (str.Find(L"mailto:") == -1) {	2439 if (str.Find(L"mailto:") == -1)

2499 str = L"mailto:" + str;	2440 str = L"mailto:" + str;

2500 }	2441

2501 return TRUE;	2442 return true;

2502 }	2443 }

2503	2444

2504 void CPDF_LinkExtract::AppendToLinkList(int start,	2445 CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {

2505 int count,	2446 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";

2506 const CFX_WideString& strUrl) {

2507 CPDF_LinkExt* linkInfo = new CPDF_LinkExt;

2508 linkInfo->m_strUrl = strUrl;

2509 linkInfo->m_Start = start;

2510 linkInfo->m_Count = count;

2511 m_LinkList.Add(linkInfo);

2512 }	2447 }

2513	2448

2514 CFX_WideString CPDF_LinkExtract::GetURL(int index) const {	2449 void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const {

2515 if (!m_bIsParsed \|\| index < 0 \|\| index >= m_LinkList.GetSize()) {	2450 if (index < m_LinkArray.size()) {

2516 return L"";	2451 m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,

	2452 m_LinkArray[index].m_Count, pRects);

2517 }	2453 }

2518 CPDF_LinkExt* link = NULL;

2519 link = m_LinkList.GetAt(index);

2520 if (!link) {

2521 return L"";

2522 }

2523 return link->m_strUrl;

2524 }	2454 }

2525 void CPDF_LinkExtract::GetBoundedSegment(int index,

2526 int& start,

2527 int& count) const {

2528 if (!m_bIsParsed \|\| index < 0 \|\| index >= m_LinkList.GetSize()) {

2529 return;

2530 }

2531 CPDF_LinkExt* link = NULL;

2532 link = m_LinkList.GetAt(index);

2533 if (!link) {

2534 return;

2535 }

2536 start = link->m_Start;

2537 count = link->m_Count;

2538 }

2539

2540 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {

2541 if (!m_bIsParsed \|\| index < 0 \|\| index >= m_LinkList.GetSize()) {

2542 return;

2543 }

2544 CPDF_LinkExt* link = NULL;

2545 link = m_LinkList.GetAt(index);

2546 if (!link) {

2547 return;

2548 }

2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);

2550 }

OLD	NEW

« no previous file with comments | « no previous file | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »