Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1316643004: Fix typos, nits and remove dead code in fpdf_text_int.cpp. (Closed) Base URL: https://pdfium.googlesource.com/pdfium@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/include/fpdftext/fpdf_text.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <algorithm> 8 #include <algorithm>
9 9
10 #include "../../../third_party/base/nonstd_unique_ptr.h" 10 #include "../../../third_party/base/nonstd_unique_ptr.h"
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 } 74 }
75 return baseSpace; 75 return baseSpace;
76 } 76 }
77 77
78 } // namespace 78 } // namespace
79 79
80 CPDFText_ParseOptions::CPDFText_ParseOptions() 80 CPDFText_ParseOptions::CPDFText_ParseOptions()
81 : m_bGetCharCodeOnly(FALSE), 81 : m_bGetCharCodeOnly(FALSE),
82 m_bNormalizeObjs(TRUE), 82 m_bNormalizeObjs(TRUE),
83 m_bOutputHyphen(FALSE) {} 83 m_bOutputHyphen(FALSE) {}
84 IPDF_TextPage* IPDF_TextPage::CreateTextPage( 84
85 const CPDF_Page* pPage,
86 CPDFText_ParseOptions ParserOptions) {
87 return new CPDF_TextPage(pPage, ParserOptions);
88 }
89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, 85 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
90 int flags) { 86 int flags) {
91 return new CPDF_TextPage(pPage, flags); 87 return new CPDF_TextPage(pPage, flags);
92 } 88 }
93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, 89
94 int flags) {
95 return new CPDF_TextPage(pObjs, flags);
96 }
97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( 90 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
98 const IPDF_TextPage* pTextPage) { 91 const IPDF_TextPage* pTextPage) {
99 if (!pTextPage) { 92 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
100 return NULL;
101 }
102 return new CPDF_TextPageFind(pTextPage);
103 } 93 }
94
104 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { 95 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
105 return new CPDF_LinkExtract(); 96 return new CPDF_LinkExtract();
106 } 97 }
98
107 #define TEXT_BLANK_CHAR L' ' 99 #define TEXT_BLANK_CHAR L' '
108 #define TEXT_LINEFEED_CHAR L'\n' 100 #define TEXT_LINEFEED_CHAR L'\n'
109 #define TEXT_RETURN_CHAR L'\r' 101 #define TEXT_RETURN_CHAR L'\r'
110 #define TEXT_EMPTY L"" 102 #define TEXT_EMPTY L""
111 #define TEXT_BLANK L" " 103 #define TEXT_BLANK L" "
112 #define TEXT_RETURN_LINEFEED L"\r\n" 104 #define TEXT_RETURN_LINEFEED L"\r\n"
113 #define TEXT_LINEFEED L"\n" 105 #define TEXT_LINEFEED L"\n"
114 #define TEXT_CHARRATIO_GAPDELTA 0.070 106 #define TEXT_CHARRATIO_GAPDELTA 0.070
107
115 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) 108 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
116 : m_charList(512), 109 : m_pPage(pPage),
110 m_charList(512),
117 m_TempCharList(50), 111 m_TempCharList(50),
118 m_pPreTextObj(NULL), 112 m_parserflag(flags),
119 m_IsParsered(FALSE), 113 m_pPreTextObj(nullptr),
114 m_bIsParsed(false),
120 m_TextlineDir(-1), 115 m_TextlineDir(-1),
121 m_CurlineRect(0, 0, 0, 0) { 116 m_CurlineRect(0, 0, 0, 0) {
122 m_pPage = pPage;
123 m_parserflag = flags;
124 m_TextBuf.EstimateSize(0, 10240); 117 m_TextBuf.EstimateSize(0, 10240);
125 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
126 (int)pPage->GetPageHeight(), 0); 119 (int)pPage->GetPageHeight(), 0);
127 } 120 }
128 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, 121
129 CPDFText_ParseOptions ParserOptions)
130 : m_ParseOptions(ParserOptions),
131 m_charList(512),
132 m_TempCharList(50),
133 m_pPreTextObj(NULL),
134 m_IsParsered(FALSE),
135 m_TextlineDir(-1),
136 m_CurlineRect(0, 0, 0, 0) {
137 m_pPage = pPage;
138 m_parserflag = 0;
139 m_TextBuf.EstimateSize(0, 10240);
140 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
141 (int)pPage->GetPageHeight(), 0);
142 }
143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags)
144 : m_charList(512),
145 m_TempCharList(50),
146 m_pPreTextObj(NULL),
147 m_IsParsered(FALSE),
148 m_TextlineDir(-1),
149 m_CurlineRect(0, 0, 0, 0) {
150 m_pPage = pPage;
151 m_parserflag = flags;
152 m_TextBuf.EstimateSize(0, 10240);
153 CFX_FloatRect pageRect = pPage->CalcBoundingBox();
154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top);
155 }
156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { 122 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) {
157 m_ParseOptions.m_bNormalizeObjs = bNormalize; 123 m_ParseOptions.m_bNormalizeObjs = bNormalize;
158 } 124 }
159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { 125 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
160 switch (charInfo.m_Unicode) { 126 switch (charInfo.m_Unicode) {
161 case 0x2: 127 case 0x2:
162 case 0x3: 128 case 0x3:
163 case 0x93: 129 case 0x93:
164 case 0x94: 130 case 0x94:
165 case 0x96: 131 case 0x96:
166 case 0x97: 132 case 0x97:
167 case 0x98: 133 case 0x98:
168 case 0xfffe: 134 case 0xfffe:
169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; 135 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
170 default: 136 default:
171 return false; 137 return false;
172 } 138 }
173 } 139 }
174 FX_BOOL CPDF_TextPage::ParseTextPage() { 140 FX_BOOL CPDF_TextPage::ParseTextPage() {
175 if (!m_pPage) { 141 m_bIsParsed = false;
176 m_IsParsered = FALSE; 142 if (!m_pPage)
177 return FALSE; 143 return FALSE;
178 } 144
179 m_IsParsered = FALSE;
180 m_TextBuf.Clear(); 145 m_TextBuf.Clear();
181 m_charList.RemoveAll(); 146 m_charList.RemoveAll();
182 m_pPreTextObj = NULL; 147 m_pPreTextObj = NULL;
183 ProcessObject(); 148 ProcessObject();
184 m_IsParsered = TRUE; 149 m_bIsParsed = true;
185 if (!m_ParseOptions.m_bGetCharCodeOnly) { 150 if (!m_ParseOptions.m_bGetCharCodeOnly) {
186 m_CharIndex.RemoveAll(); 151 m_CharIndex.RemoveAll();
187 int nCount = m_charList.GetSize(); 152 int nCount = m_charList.GetSize();
188 if (nCount) { 153 if (nCount) {
189 m_CharIndex.Add(0); 154 m_CharIndex.Add(0);
190 } 155 }
191 for (int i = 0; i < nCount; i++) { 156 for (int i = 0; i < nCount; i++) {
192 int indexSize = m_CharIndex.GetSize(); 157 int indexSize = m_CharIndex.GetSize();
193 FX_BOOL bNormal = FALSE; 158 FX_BOOL bNormal = FALSE;
194 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); 159 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
262 } 227 }
263 void CPDF_TextPage::GetRectArray(int start, 228 void CPDF_TextPage::GetRectArray(int start,
264 int nCount, 229 int nCount,
265 CFX_RectArray& rectArray) const { 230 CFX_RectArray& rectArray) const {
266 if (m_ParseOptions.m_bGetCharCodeOnly) { 231 if (m_ParseOptions.m_bGetCharCodeOnly) {
267 return; 232 return;
268 } 233 }
269 if (start < 0 || nCount == 0) { 234 if (start < 0 || nCount == 0) {
270 return; 235 return;
271 } 236 }
272 if (!m_IsParsered) { 237 if (!m_bIsParsed) {
273 return; 238 return;
274 } 239 }
275 PAGECHAR_INFO info_curchar; 240 PAGECHAR_INFO info_curchar;
276 CPDF_TextObject* pCurObj = NULL; 241 CPDF_TextObject* pCurObj = NULL;
277 CFX_FloatRect rect; 242 CFX_FloatRect rect;
278 int curPos = start; 243 int curPos = start;
279 FX_BOOL flagNewRect = TRUE; 244 FX_BOOL flagNewRect = TRUE;
280 if (nCount + start > m_charList.GetSize() || nCount == -1) { 245 if (nCount + start > m_charList.GetSize() || nCount == -1) {
281 nCount = m_charList.GetSize() - start; 246 nCount = m_charList.GetSize() - start;
282 } 247 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
345 rect.bottom = info_curchar.m_CharBox.bottom; 310 rect.bottom = info_curchar.m_CharBox.bottom;
346 } 311 }
347 } 312 }
348 } 313 }
349 rectArray.Add(rect); 314 rectArray.Add(rect);
350 return; 315 return;
351 } 316 }
352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, 317 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
353 FX_FLOAT xTolerance, 318 FX_FLOAT xTolerance,
354 FX_FLOAT yTolerance) const { 319 FX_FLOAT yTolerance) const {
355 if (m_ParseOptions.m_bGetCharCodeOnly) { 320 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
356 return -3; 321 return -3;
Tom Sepez 2015/09/04 16:34:56 -3. How intuitive.
357 } 322
358 if (!m_IsParsered) {
359 return -3;
360 }
361 int pos = 0; 323 int pos = 0;
362 int NearPos = -1; 324 int NearPos = -1;
363 double xdif = 5000, ydif = 5000; 325 double xdif = 5000, ydif = 5000;
364 while (pos < m_charList.GetSize()) { 326 while (pos < m_charList.GetSize()) {
365 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); 327 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos));
366 CFX_FloatRect charrect = charinfo.m_CharBox; 328 CFX_FloatRect charrect = charinfo.m_CharBox;
367 if (charrect.Contains(point.x, point.y)) { 329 if (charrect.Contains(point.x, point.y)) {
368 break; 330 break;
369 } 331 }
370 if (xTolerance > 0 || yTolerance > 0) { 332 if (xTolerance > 0 || yTolerance > 0) {
(...skipping 22 matching lines...) Expand all
393 } 355 }
394 ++pos; 356 ++pos;
395 } 357 }
396 if (pos >= m_charList.GetSize()) { 358 if (pos >= m_charList.GetSize()) {
397 pos = NearPos; 359 pos = NearPos;
398 } 360 }
399 return pos; 361 return pos;
400 } 362 }
401 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { 363 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
402 CFX_WideString strText; 364 CFX_WideString strText;
403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { 365 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
404 return strText; 366 return strText;
405 } 367
406 int nCount = m_charList.GetSize(); 368 int nCount = m_charList.GetSize();
407 int pos = 0; 369 int pos = 0;
408 FX_FLOAT posy = 0; 370 FX_FLOAT posy = 0;
409 FX_BOOL IsContainPreChar = FALSE; 371 FX_BOOL IsContainPreChar = FALSE;
410 FX_BOOL ISAddLineFeed = FALSE; 372 FX_BOOL ISAddLineFeed = FALSE;
411 while (pos < nCount) { 373 while (pos < nCount) {
412 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); 374 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
413 if (IsRectIntersect(rect, charinfo.m_CharBox)) { 375 if (IsRectIntersect(rect, charinfo.m_CharBox)) {
414 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && 376 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar &&
415 ISAddLineFeed) { 377 ISAddLineFeed) {
(...skipping 15 matching lines...) Expand all
431 } 393 }
432 } else { 394 } else {
433 IsContainPreChar = FALSE; 395 IsContainPreChar = FALSE;
434 ISAddLineFeed = TRUE; 396 ISAddLineFeed = TRUE;
435 } 397 }
436 } 398 }
437 return strText; 399 return strText;
438 } 400 }
439 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, 401 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
440 CFX_RectArray& resRectArray) const { 402 CFX_RectArray& resRectArray) const {
441 if (m_ParseOptions.m_bGetCharCodeOnly) { 403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
442 return; 404 return;
443 } 405
444 if (!m_IsParsered) {
445 return;
446 }
447 CFX_FloatRect curRect; 406 CFX_FloatRect curRect;
448 FX_BOOL flagNewRect = TRUE; 407 FX_BOOL flagNewRect = TRUE;
449 CPDF_TextObject* pCurObj = NULL; 408 CPDF_TextObject* pCurObj = NULL;
450 int nCount = m_charList.GetSize(); 409 int nCount = m_charList.GetSize();
451 int pos = 0; 410 int pos = 0;
452 while (pos < nCount) { 411 while (pos < nCount) {
453 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); 412 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
454 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { 413 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
455 continue; 414 continue;
456 } 415 }
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
491 FX_FLOAT y, 450 FX_FLOAT y,
492 FX_FLOAT xTolerance, 451 FX_FLOAT xTolerance,
493 FX_FLOAT yTolerance) const { 452 FX_FLOAT yTolerance) const {
494 if (m_ParseOptions.m_bGetCharCodeOnly) { 453 if (m_ParseOptions.m_bGetCharCodeOnly) {
495 return -3; 454 return -3;
496 } 455 }
497 CPDF_Point point(x, y); 456 CPDF_Point point(x, y);
498 return GetIndexAtPos(point, xTolerance, yTolerance); 457 return GetIndexAtPos(point, xTolerance, yTolerance);
499 } 458 }
500 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { 459 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const {
501 if (m_ParseOptions.m_bGetCharCodeOnly) { 460 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
502 return; 461 return;
503 } 462
504 if (!m_IsParsered) { 463 if (index < 0 || index >= m_charList.GetSize())
505 return; 464 return;
506 } 465
507 if (index < 0 || index >= m_charList.GetSize()) {
508 return;
509 }
510 PAGECHAR_INFO charinfo; 466 PAGECHAR_INFO charinfo;
511 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); 467 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
512 info.m_Charcode = charinfo.m_CharCode; 468 info.m_Charcode = charinfo.m_CharCode;
513 info.m_OriginX = charinfo.m_OriginX; 469 info.m_OriginX = charinfo.m_OriginX;
514 info.m_OriginY = charinfo.m_OriginY; 470 info.m_OriginY = charinfo.m_OriginY;
515 info.m_Unicode = charinfo.m_Unicode; 471 info.m_Unicode = charinfo.m_Unicode;
516 info.m_Flag = charinfo.m_Flag; 472 info.m_Flag = charinfo.m_Flag;
517 info.m_CharBox = charinfo.m_CharBox; 473 info.m_CharBox = charinfo.m_CharBox;
518 info.m_pTextObj = charinfo.m_pTextObj; 474 info.m_pTextObj = charinfo.m_pTextObj;
519 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { 475 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 if (endIndex >= m_charList.GetSize()) { 510 if (endIndex >= m_charList.GetSize()) {
555 break; 511 break;
556 } 512 }
557 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); 513 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex);
558 } 514 }
559 endIndex--; 515 endIndex--;
560 nCount = endIndex - start + 1; 516 nCount = endIndex - start + 1;
561 } 517 }
562 } 518 }
563 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { 519 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
564 if (!m_IsParsered || nCount == 0) { 520 if (!m_bIsParsed || nCount == 0)
565 return L""; 521 return L"";
566 } 522
567 if (start < 0) { 523 if (start < 0)
568 start = 0; 524 start = 0;
569 } 525
570 if (nCount == -1) { 526 if (nCount == -1) {
571 nCount = m_charList.GetSize() - start; 527 nCount = m_charList.GetSize() - start;
572 return m_TextBuf.GetWideString().Mid(start, 528 return m_TextBuf.GetWideString().Mid(start,
573 m_TextBuf.GetWideString().GetLength()); 529 m_TextBuf.GetWideString().GetLength());
574 } 530 }
575 if (nCount <= 0 || m_charList.GetSize() <= 0) { 531 if (nCount <= 0 || m_charList.GetSize() <= 0) {
576 return L""; 532 return L"";
577 } 533 }
578 if (nCount + start > m_charList.GetSize() - 1) { 534 if (nCount + start > m_charList.GetSize() - 1) {
579 nCount = m_charList.GetSize() - start; 535 nCount = m_charList.GetSize() - start;
(...skipping 23 matching lines...) Expand all
603 charinfo = 559 charinfo =
604 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); 560 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1);
605 } 561 }
606 nCount = start + nCount - nCountOffset - startindex; 562 nCount = start + nCount - nCountOffset - startindex;
607 if (nCount <= 0) { 563 if (nCount <= 0) {
608 return L""; 564 return L"";
609 } 565 }
610 return m_TextBuf.GetWideString().Mid(startindex, nCount); 566 return m_TextBuf.GetWideString().Mid(startindex, nCount);
611 } 567 }
612 int CPDF_TextPage::CountRects(int start, int nCount) { 568 int CPDF_TextPage::CountRects(int start, int nCount) {
613 if (m_ParseOptions.m_bGetCharCodeOnly) { 569 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0)
614 return -1; 570 return -1;
615 } 571
616 if (!m_IsParsered) {
617 return -1;
618 }
619 if (start < 0) {
620 return -1;
621 }
622 if (nCount == -1 || nCount + start > m_charList.GetSize()) { 572 if (nCount == -1 || nCount + start > m_charList.GetSize()) {
623 nCount = m_charList.GetSize() - start; 573 nCount = m_charList.GetSize() - start;
624 } 574 }
625 m_SelRects.RemoveAll(); 575 m_SelRects.RemoveAll();
626 GetRectArray(start, nCount, m_SelRects); 576 GetRectArray(start, nCount, m_SelRects);
627 return m_SelRects.GetSize(); 577 return m_SelRects.GetSize();
628 } 578 }
629 void CPDF_TextPage::GetRect(int rectIndex, 579 void CPDF_TextPage::GetRect(int rectIndex,
630 FX_FLOAT& left, 580 FX_FLOAT& left,
631 FX_FLOAT& top, 581 FX_FLOAT& top,
632 FX_FLOAT& right, 582 FX_FLOAT& right,
633 FX_FLOAT& bottom) const { 583 FX_FLOAT& bottom) const {
634 if (m_ParseOptions.m_bGetCharCodeOnly) { 584 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
635 return; 585 return;
636 } 586
637 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { 587 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
638 return; 588 return;
639 } 589
640 left = m_SelRects.GetAt(rectIndex).left; 590 left = m_SelRects.GetAt(rectIndex).left;
641 top = m_SelRects.GetAt(rectIndex).top; 591 top = m_SelRects.GetAt(rectIndex).top;
642 right = m_SelRects.GetAt(rectIndex).right; 592 right = m_SelRects.GetAt(rectIndex).right;
643 bottom = m_SelRects.GetAt(rectIndex).bottom; 593 bottom = m_SelRects.GetAt(rectIndex).bottom;
644 } 594 }
645 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { 595 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
646 if (m_ParseOptions.m_bGetCharCodeOnly) { 596 if (m_ParseOptions.m_bGetCharCodeOnly) {
647 return FALSE; 597 return FALSE;
648 } 598 }
649 if (end == start) { 599 if (end == start) {
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
696 GetBoundedSegment(n - 1, start, count); 646 GetBoundedSegment(n - 1, start, count);
697 end = start + count - 1; 647 end = start + count - 1;
698 GetBoundedSegment(0, start, count); 648 GetBoundedSegment(0, start, count);
699 } else { 649 } else {
700 GetBoundedSegment(0, start, count); 650 GetBoundedSegment(0, start, count);
701 end = start + count - 1; 651 end = start + count - 1;
702 } 652 }
703 return GetBaselineRotate(start, end, Rotate); 653 return GetBaselineRotate(start, end, Rotate);
704 } 654 }
705 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { 655 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
706 if (m_ParseOptions.m_bGetCharCodeOnly) { 656 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
707 return FALSE; 657 return FALSE;
708 } 658
709 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { 659 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize())
710 return FALSE; 660 return FALSE;
711 } 661
712 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); 662 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
713 return GetBaselineRotate(rect, Rotate); 663 return GetBaselineRotate(rect, Rotate);
714 } 664 }
715 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, 665 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
716 FX_FLOAT top, 666 FX_FLOAT top,
717 FX_FLOAT right, 667 FX_FLOAT right,
718 FX_FLOAT bottom, 668 FX_FLOAT bottom,
719 FX_BOOL bContains) { 669 FX_BOOL bContains) {
720 if (m_ParseOptions.m_bGetCharCodeOnly) { 670 if (m_ParseOptions.m_bGetCharCodeOnly)
721 return -1; 671 return -1;
722 } 672
723 m_Segment.RemoveAll(); 673 m_Segment.RemoveAll();
724 if (!m_IsParsered) { 674 if (!m_bIsParsed)
725 return -1; 675 return -1;
726 } 676
727 CFX_FloatRect rect(left, bottom, right, top); 677 CFX_FloatRect rect(left, bottom, right, top);
728 rect.Normalize(); 678 rect.Normalize();
729 int nCount = m_charList.GetSize(); 679 int nCount = m_charList.GetSize();
730 int pos = 0; 680 int pos = 0;
731 FPDF_SEGMENT segment; 681 FPDF_SEGMENT segment;
732 segment.m_Start = 0; 682 segment.m_Start = 0;
733 segment.m_nCount = 0; 683 segment.m_nCount = 0;
734 int segmentStatus = 0; 684 int segmentStatus = 0;
735 FX_BOOL IsContainPreChar = FALSE; 685 FX_BOOL IsContainPreChar = FALSE;
736 while (pos < nCount) { 686 while (pos < nCount) {
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
796 if (m_ParseOptions.m_bGetCharCodeOnly) { 746 if (m_ParseOptions.m_bGetCharCodeOnly) {
797 return; 747 return;
798 } 748 }
799 if (index < 0 || index >= m_Segment.GetSize()) { 749 if (index < 0 || index >= m_Segment.GetSize()) {
800 return; 750 return;
801 } 751 }
802 start = m_Segment.GetAt(index).m_Start; 752 start = m_Segment.GetAt(index).m_Start;
803 count = m_Segment.GetAt(index).m_nCount; 753 count = m_Segment.GetAt(index).m_nCount;
804 } 754 }
805 int CPDF_TextPage::GetWordBreak(int index, int direction) const { 755 int CPDF_TextPage::GetWordBreak(int index, int direction) const {
806 if (m_ParseOptions.m_bGetCharCodeOnly) { 756 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
807 return -1; 757 return -1;
808 } 758
809 if (!m_IsParsered) { 759 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
810 return -1; 760 return -1;
811 } 761
812 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { 762 if (index < 0 || index >= m_charList.GetSize())
813 return -1; 763 return -1;
814 } 764
815 if (index < 0 || index >= m_charList.GetSize()) {
816 return -1;
817 }
818 PAGECHAR_INFO charinfo; 765 PAGECHAR_INFO charinfo;
819 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); 766 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
820 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { 767 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
821 return index; 768 return index;
822 } 769 }
823 if (!IsLetter(charinfo.m_Unicode)) { 770 if (!IsLetter(charinfo.m_Unicode)) {
824 return index; 771 return index;
825 } 772 }
826 int breakPos = index; 773 int breakPos = index;
827 if (direction == FPDFTEXT_LEFT) { 774 if (direction == FPDFTEXT_LEFT) {
(...skipping 1727 matching lines...) Expand 10 before | Expand all | Expand 10 after
2555 rects.Copy(m_resArray); 2502 rects.Copy(m_resArray);
2556 } 2503 }
2557 int CPDF_TextPageFind::GetCurOrder() const { 2504 int CPDF_TextPageFind::GetCurOrder() const {
2558 return GetCharIndex(m_resStart); 2505 return GetCharIndex(m_resStart);
2559 } 2506 }
2560 int CPDF_TextPageFind::GetMatchedCount() const { 2507 int CPDF_TextPageFind::GetMatchedCount() const {
2561 int resStart = GetCharIndex(m_resStart); 2508 int resStart = GetCharIndex(m_resStart);
2562 int resEnd = GetCharIndex(m_resEnd); 2509 int resEnd = GetCharIndex(m_resEnd);
2563 return resEnd - resStart + 1; 2510 return resEnd - resStart + 1;
2564 } 2511 }
2565 CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} 2512
2513 CPDF_LinkExtract::CPDF_LinkExtract()
2514 : m_pTextPage(nullptr), m_bIsParsed(false) {
2515 }
2516
2566 CPDF_LinkExtract::~CPDF_LinkExtract() { 2517 CPDF_LinkExtract::~CPDF_LinkExtract() {
2567 DeleteLinkList(); 2518 DeleteLinkList();
2568 } 2519 }
2520
2569 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { 2521 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) {
2570 if (!pTextPage || !pTextPage->IsParsered()) { 2522 if (!pTextPage || !pTextPage->IsParsed())
2571 return FALSE; 2523 return FALSE;
2572 } 2524
2573 m_pTextPage = (const CPDF_TextPage*)pTextPage; 2525 m_pTextPage = (const CPDF_TextPage*)pTextPage;
2574 m_strPageText = m_pTextPage->GetPageText(0, -1); 2526 m_strPageText = m_pTextPage->GetPageText(0, -1);
2575 DeleteLinkList(); 2527 DeleteLinkList();
2576 if (m_strPageText.IsEmpty()) { 2528 if (m_strPageText.IsEmpty()) {
2577 return FALSE; 2529 return FALSE;
2578 } 2530 }
2579 parserLink(); 2531 ParseLink();
2580 m_IsParserd = TRUE; 2532 m_bIsParsed = true;
2581 return TRUE; 2533 return TRUE;
2582 } 2534 }
2535
2583 void CPDF_LinkExtract::DeleteLinkList() { 2536 void CPDF_LinkExtract::DeleteLinkList() {
2584 while (m_LinkList.GetSize()) { 2537 while (m_LinkList.GetSize()) {
2585 CPDF_LinkExt* linkinfo = NULL; 2538 CPDF_LinkExt* linkinfo = NULL;
2586 linkinfo = m_LinkList.GetAt(0); 2539 linkinfo = m_LinkList.GetAt(0);
2587 m_LinkList.RemoveAt(0); 2540 m_LinkList.RemoveAt(0);
2588 delete linkinfo; 2541 delete linkinfo;
2589 } 2542 }
2590 m_LinkList.RemoveAll(); 2543 m_LinkList.RemoveAll();
2591 } 2544 }
2592 int CPDF_LinkExtract::CountLinks() const { 2545 int CPDF_LinkExtract::CountLinks() const {
2593 if (!m_IsParserd) { 2546 if (!m_bIsParsed) {
2594 return -1; 2547 return -1;
2595 } 2548 }
2596 return m_LinkList.GetSize(); 2549 return m_LinkList.GetSize();
2597 } 2550 }
2598 void CPDF_LinkExtract::parserLink() { 2551 void CPDF_LinkExtract::ParseLink() {
2599 int start = 0, pos = 0; 2552 int start = 0, pos = 0;
2600 int TotalChar = m_pTextPage->CountChars(); 2553 int TotalChar = m_pTextPage->CountChars();
2601 while (pos < TotalChar) { 2554 while (pos < TotalChar) {
2602 FPDF_CHAR_INFO pageChar; 2555 FPDF_CHAR_INFO pageChar;
2603 m_pTextPage->GetCharInfo(pos, pageChar); 2556 m_pTextPage->GetCharInfo(pos, pageChar);
2604 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || 2557 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 ||
2605 pos == TotalChar - 1) { 2558 pos == TotalChar - 1) {
2606 int nCount = pos - start; 2559 int nCount = pos - start;
2607 if (pos == TotalChar - 1) { 2560 if (pos == TotalChar - 1) {
2608 nCount++; 2561 nCount++;
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
2741 int count, 2694 int count,
2742 const CFX_WideString& strUrl) { 2695 const CFX_WideString& strUrl) {
2743 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; 2696 CPDF_LinkExt* linkInfo = new CPDF_LinkExt;
2744 linkInfo->m_strUrl = strUrl; 2697 linkInfo->m_strUrl = strUrl;
2745 linkInfo->m_Start = start; 2698 linkInfo->m_Start = start;
2746 linkInfo->m_Count = count; 2699 linkInfo->m_Count = count;
2747 m_LinkList.Add(linkInfo); 2700 m_LinkList.Add(linkInfo);
2748 return TRUE; 2701 return TRUE;
2749 } 2702 }
2750 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { 2703 CFX_WideString CPDF_LinkExtract::GetURL(int index) const {
2751 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2704 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2752 return L""; 2705 return L"";
2753 } 2706 }
2754 CPDF_LinkExt* link = NULL; 2707 CPDF_LinkExt* link = NULL;
2755 link = m_LinkList.GetAt(index); 2708 link = m_LinkList.GetAt(index);
2756 if (!link) { 2709 if (!link) {
2757 return L""; 2710 return L"";
2758 } 2711 }
2759 return link->m_strUrl; 2712 return link->m_strUrl;
2760 } 2713 }
2761 void CPDF_LinkExtract::GetBoundedSegment(int index, 2714 void CPDF_LinkExtract::GetBoundedSegment(int index,
2762 int& start, 2715 int& start,
2763 int& count) const { 2716 int& count) const {
2764 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2717 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2765 return; 2718 return;
2766 } 2719 }
2767 CPDF_LinkExt* link = NULL; 2720 CPDF_LinkExt* link = NULL;
2768 link = m_LinkList.GetAt(index); 2721 link = m_LinkList.GetAt(index);
2769 if (!link) { 2722 if (!link) {
2770 return; 2723 return;
2771 } 2724 }
2772 start = link->m_Start; 2725 start = link->m_Start;
2773 count = link->m_Count; 2726 count = link->m_Count;
2774 } 2727 }
2775 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { 2728 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
2776 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2729 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2777 return; 2730 return;
2778 } 2731 }
2779 CPDF_LinkExt* link = NULL; 2732 CPDF_LinkExt* link = NULL;
2780 link = m_LinkList.GetAt(index); 2733 link = m_LinkList.GetAt(index);
2781 if (!link) { 2734 if (!link) {
2782 return; 2735 return;
2783 } 2736 }
2784 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2737 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2785 } 2738 }
OLDNEW
« no previous file with comments | « core/include/fpdftext/fpdf_text.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698