Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(961)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1405133008: XFA: Manually correct IsParsered to IsParsed in remaining places. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@xfa
Patch Set: Update. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | core/src/fpdftext/text_int.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <algorithm> 8 #include <algorithm>
9 9
10 #include "../../../third_party/base/nonstd_unique_ptr.h" 10 #include "../../../third_party/base/nonstd_unique_ptr.h"
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
90 int flags) { 90 int flags) {
91 return new CPDF_TextPage(pPage, flags); 91 return new CPDF_TextPage(pPage, flags);
92 } 92 }
93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs,
94 int flags) { 94 int flags) {
95 return new CPDF_TextPage(pObjs, flags); 95 return new CPDF_TextPage(pObjs, flags);
96 } 96 }
97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( 97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
98 const IPDF_TextPage* pTextPage) { 98 const IPDF_TextPage* pTextPage) {
99 if (!pTextPage) { 99 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
100 return NULL;
101 }
102 return new CPDF_TextPageFind(pTextPage);
103 } 100 }
104 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { 101 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
105 return new CPDF_LinkExtract(); 102 return new CPDF_LinkExtract();
106 } 103 }
107 #define TEXT_BLANK_CHAR L' ' 104 #define TEXT_BLANK_CHAR L' '
108 #define TEXT_LINEFEED_CHAR L'\n' 105 #define TEXT_LINEFEED_CHAR L'\n'
109 #define TEXT_RETURN_CHAR L'\r' 106 #define TEXT_RETURN_CHAR L'\r'
110 #define TEXT_EMPTY L"" 107 #define TEXT_EMPTY L""
111 #define TEXT_BLANK L" " 108 #define TEXT_BLANK L" "
112 #define TEXT_RETURN_LINEFEED L"\r\n" 109 #define TEXT_RETURN_LINEFEED L"\r\n"
113 #define TEXT_LINEFEED L"\n" 110 #define TEXT_LINEFEED L"\n"
114 #define TEXT_CHARRATIO_GAPDELTA 0.070 111 #define TEXT_CHARRATIO_GAPDELTA 0.070
112
115 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) 113 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
116 : m_charList(512), 114 : m_pPage(pPage),
115 m_charList(512),
117 m_TempCharList(50), 116 m_TempCharList(50),
118 m_pPreTextObj(NULL), 117 m_parserflag(flags),
119 m_IsParsered(FALSE), 118 m_pPreTextObj(nullptr),
119 m_bIsParsed(false),
120 m_TextlineDir(-1), 120 m_TextlineDir(-1),
121 m_CurlineRect(0, 0, 0, 0) { 121 m_CurlineRect(0, 0, 0, 0) {
122 m_pPage = pPage;
123 m_parserflag = flags;
124 m_TextBuf.EstimateSize(0, 10240); 122 m_TextBuf.EstimateSize(0, 10240);
125 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 123 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
126 (int)pPage->GetPageHeight(), 0); 124 (int)pPage->GetPageHeight(), 0);
127 } 125 }
126
128 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, 127 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage,
129 CPDFText_ParseOptions ParserOptions) 128 CPDFText_ParseOptions ParserOptions)
130 : m_ParseOptions(ParserOptions), 129 : m_ParseOptions(ParserOptions),
130 m_pPage(pPage),
131 m_charList(512), 131 m_charList(512),
132 m_TempCharList(50), 132 m_TempCharList(50),
133 m_pPreTextObj(NULL), 133 m_parserflag(0),
134 m_IsParsered(FALSE), 134 m_pPreTextObj(nullptr),
135 m_bIsParsed(false),
135 m_TextlineDir(-1), 136 m_TextlineDir(-1),
136 m_CurlineRect(0, 0, 0, 0) { 137 m_CurlineRect(0, 0, 0, 0) {
137 m_pPage = pPage;
138 m_parserflag = 0;
139 m_TextBuf.EstimateSize(0, 10240); 138 m_TextBuf.EstimateSize(0, 10240);
140 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 139 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
141 (int)pPage->GetPageHeight(), 0); 140 (int)pPage->GetPageHeight(), 0);
142 } 141 }
142
143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) 143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags)
144 : m_charList(512), 144 : m_pPage(pPage),
145 m_charList(512),
145 m_TempCharList(50), 146 m_TempCharList(50),
146 m_pPreTextObj(NULL), 147 m_parserflag(flags),
147 m_IsParsered(FALSE), 148 m_pPreTextObj(nullptr),
149 m_bIsParsed(false),
148 m_TextlineDir(-1), 150 m_TextlineDir(-1),
149 m_CurlineRect(0, 0, 0, 0) { 151 m_CurlineRect(0, 0, 0, 0) {
150 m_pPage = pPage;
151 m_parserflag = flags;
152 m_TextBuf.EstimateSize(0, 10240); 152 m_TextBuf.EstimateSize(0, 10240);
153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); 153 CFX_FloatRect pageRect = pPage->CalcBoundingBox();
154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); 154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top);
155 } 155 }
156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { 156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) {
157 m_ParseOptions.m_bNormalizeObjs = bNormalize; 157 m_ParseOptions.m_bNormalizeObjs = bNormalize;
158 } 158 }
159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { 159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
160 switch (charInfo.m_Unicode) { 160 switch (charInfo.m_Unicode) {
161 case 0x2: 161 case 0x2:
162 case 0x3: 162 case 0x3:
163 case 0x93: 163 case 0x93:
164 case 0x94: 164 case 0x94:
165 case 0x96: 165 case 0x96:
166 case 0x97: 166 case 0x97:
167 case 0x98: 167 case 0x98:
168 case 0xfffe: 168 case 0xfffe:
169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; 169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
170 default: 170 default:
171 return false; 171 return false;
172 } 172 }
173 } 173 }
174 FX_BOOL CPDF_TextPage::ParseTextPage() { 174 FX_BOOL CPDF_TextPage::ParseTextPage() {
175 if (!m_pPage) { 175 m_bIsParsed = false;
176 m_IsParsered = FALSE; 176 if (!m_pPage)
177 return FALSE; 177 return FALSE;
178 } 178
179 m_IsParsered = FALSE;
180 m_TextBuf.Clear(); 179 m_TextBuf.Clear();
181 m_charList.RemoveAll(); 180 m_charList.RemoveAll();
182 m_pPreTextObj = NULL; 181 m_pPreTextObj = NULL;
183 ProcessObject(); 182 ProcessObject();
184 m_IsParsered = TRUE; 183 m_bIsParsed = true;
185 if (!m_ParseOptions.m_bGetCharCodeOnly) { 184 if (!m_ParseOptions.m_bGetCharCodeOnly) {
186 m_CharIndex.RemoveAll(); 185 m_CharIndex.RemoveAll();
187 int nCount = m_charList.GetSize(); 186 int nCount = m_charList.GetSize();
188 if (nCount) { 187 if (nCount) {
189 m_CharIndex.Add(0); 188 m_CharIndex.Add(0);
190 } 189 }
191 for (int i = 0; i < nCount; i++) { 190 for (int i = 0; i < nCount; i++) {
192 int indexSize = m_CharIndex.GetSize(); 191 int indexSize = m_CharIndex.GetSize();
193 FX_BOOL bNormal = FALSE; 192 FX_BOOL bNormal = FALSE;
194 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); 193 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
262 } 261 }
263 void CPDF_TextPage::GetRectArray(int start, 262 void CPDF_TextPage::GetRectArray(int start,
264 int nCount, 263 int nCount,
265 CFX_RectArray& rectArray) const { 264 CFX_RectArray& rectArray) const {
266 if (m_ParseOptions.m_bGetCharCodeOnly) { 265 if (m_ParseOptions.m_bGetCharCodeOnly) {
267 return; 266 return;
268 } 267 }
269 if (start < 0 || nCount == 0) { 268 if (start < 0 || nCount == 0) {
270 return; 269 return;
271 } 270 }
272 if (!m_IsParsered) { 271 if (!m_bIsParsed) {
273 return; 272 return;
274 } 273 }
275 PAGECHAR_INFO info_curchar; 274 PAGECHAR_INFO info_curchar;
276 CPDF_TextObject* pCurObj = NULL; 275 CPDF_TextObject* pCurObj = NULL;
277 CFX_FloatRect rect; 276 CFX_FloatRect rect;
278 int curPos = start; 277 int curPos = start;
279 FX_BOOL flagNewRect = TRUE; 278 FX_BOOL flagNewRect = TRUE;
280 if (nCount + start > m_charList.GetSize() || nCount == -1) { 279 if (nCount + start > m_charList.GetSize() || nCount == -1) {
281 nCount = m_charList.GetSize() - start; 280 nCount = m_charList.GetSize() - start;
282 } 281 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
345 rect.bottom = info_curchar.m_CharBox.bottom; 344 rect.bottom = info_curchar.m_CharBox.bottom;
346 } 345 }
347 } 346 }
348 } 347 }
349 rectArray.Add(rect); 348 rectArray.Add(rect);
350 return; 349 return;
351 } 350 }
352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, 351 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
353 FX_FLOAT xTolerance, 352 FX_FLOAT xTolerance,
354 FX_FLOAT yTolerance) const { 353 FX_FLOAT yTolerance) const {
355 if (m_ParseOptions.m_bGetCharCodeOnly) { 354 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
356 return -3; 355 return -3;
357 } 356
358 if (!m_IsParsered) {
359 return -3;
360 }
361 int pos = 0; 357 int pos = 0;
362 int NearPos = -1; 358 int NearPos = -1;
363 double xdif = 5000, ydif = 5000; 359 double xdif = 5000, ydif = 5000;
364 while (pos < m_charList.GetSize()) { 360 while (pos < m_charList.GetSize()) {
365 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); 361 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos));
366 CFX_FloatRect charrect = charinfo.m_CharBox; 362 CFX_FloatRect charrect = charinfo.m_CharBox;
367 if (charrect.Contains(point.x, point.y)) { 363 if (charrect.Contains(point.x, point.y)) {
368 break; 364 break;
369 } 365 }
370 if (xTolerance > 0 || yTolerance > 0) { 366 if (xTolerance > 0 || yTolerance > 0) {
(...skipping 22 matching lines...) Expand all
393 } 389 }
394 ++pos; 390 ++pos;
395 } 391 }
396 if (pos >= m_charList.GetSize()) { 392 if (pos >= m_charList.GetSize()) {
397 pos = NearPos; 393 pos = NearPos;
398 } 394 }
399 return pos; 395 return pos;
400 } 396 }
401 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { 397 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
402 CFX_WideString strText; 398 CFX_WideString strText;
403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { 399 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
404 return strText; 400 return strText;
405 } 401
406 int nCount = m_charList.GetSize(); 402 int nCount = m_charList.GetSize();
407 int pos = 0; 403 int pos = 0;
408 FX_FLOAT posy = 0; 404 FX_FLOAT posy = 0;
409 FX_BOOL IsContainPreChar = FALSE; 405 FX_BOOL IsContainPreChar = FALSE;
410 FX_BOOL ISAddLineFeed = FALSE; 406 FX_BOOL ISAddLineFeed = FALSE;
411 while (pos < nCount) { 407 while (pos < nCount) {
412 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); 408 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
413 if (IsRectIntersect(rect, charinfo.m_CharBox)) { 409 if (IsRectIntersect(rect, charinfo.m_CharBox)) {
414 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && 410 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar &&
415 ISAddLineFeed) { 411 ISAddLineFeed) {
(...skipping 15 matching lines...) Expand all
431 } 427 }
432 } else { 428 } else {
433 IsContainPreChar = FALSE; 429 IsContainPreChar = FALSE;
434 ISAddLineFeed = TRUE; 430 ISAddLineFeed = TRUE;
435 } 431 }
436 } 432 }
437 return strText; 433 return strText;
438 } 434 }
439 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, 435 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
440 CFX_RectArray& resRectArray) const { 436 CFX_RectArray& resRectArray) const {
441 if (m_ParseOptions.m_bGetCharCodeOnly) { 437 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
442 return; 438 return;
443 } 439
444 if (!m_IsParsered) {
445 return;
446 }
447 CFX_FloatRect curRect; 440 CFX_FloatRect curRect;
448 FX_BOOL flagNewRect = TRUE; 441 FX_BOOL flagNewRect = TRUE;
449 CPDF_TextObject* pCurObj = NULL; 442 CPDF_TextObject* pCurObj = NULL;
450 int nCount = m_charList.GetSize(); 443 int nCount = m_charList.GetSize();
451 int pos = 0; 444 int pos = 0;
452 while (pos < nCount) { 445 while (pos < nCount) {
453 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); 446 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
454 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { 447 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
455 continue; 448 continue;
456 } 449 }
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
491 FX_FLOAT y, 484 FX_FLOAT y,
492 FX_FLOAT xTolerance, 485 FX_FLOAT xTolerance,
493 FX_FLOAT yTolerance) const { 486 FX_FLOAT yTolerance) const {
494 if (m_ParseOptions.m_bGetCharCodeOnly) { 487 if (m_ParseOptions.m_bGetCharCodeOnly) {
495 return -3; 488 return -3;
496 } 489 }
497 CPDF_Point point(x, y); 490 CPDF_Point point(x, y);
498 return GetIndexAtPos(point, xTolerance, yTolerance); 491 return GetIndexAtPos(point, xTolerance, yTolerance);
499 } 492 }
500 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { 493 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const {
501 if (m_ParseOptions.m_bGetCharCodeOnly) { 494 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
502 return; 495 return;
503 } 496
504 if (!m_IsParsered) { 497 if (index < 0 || index >= m_charList.GetSize())
505 return; 498 return;
506 } 499
507 if (index < 0 || index >= m_charList.GetSize()) {
508 return;
509 }
510 PAGECHAR_INFO charinfo; 500 PAGECHAR_INFO charinfo;
511 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); 501 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
512 info.m_Charcode = charinfo.m_CharCode; 502 info.m_Charcode = charinfo.m_CharCode;
513 info.m_OriginX = charinfo.m_OriginX; 503 info.m_OriginX = charinfo.m_OriginX;
514 info.m_OriginY = charinfo.m_OriginY; 504 info.m_OriginY = charinfo.m_OriginY;
515 info.m_Unicode = charinfo.m_Unicode; 505 info.m_Unicode = charinfo.m_Unicode;
516 info.m_Flag = charinfo.m_Flag; 506 info.m_Flag = charinfo.m_Flag;
517 info.m_CharBox = charinfo.m_CharBox; 507 info.m_CharBox = charinfo.m_CharBox;
518 info.m_pTextObj = charinfo.m_pTextObj; 508 info.m_pTextObj = charinfo.m_pTextObj;
519 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { 509 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 if (endIndex >= m_charList.GetSize()) { 544 if (endIndex >= m_charList.GetSize()) {
555 break; 545 break;
556 } 546 }
557 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); 547 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex);
558 } 548 }
559 endIndex--; 549 endIndex--;
560 nCount = endIndex - start + 1; 550 nCount = endIndex - start + 1;
561 } 551 }
562 } 552 }
563 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { 553 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
564 if (!m_IsParsered || nCount == 0) { 554 if (!m_bIsParsed || nCount == 0)
565 return L""; 555 return L"";
566 } 556
567 if (start < 0) { 557 if (start < 0)
568 start = 0; 558 start = 0;
569 } 559
570 if (nCount == -1) { 560 if (nCount == -1) {
571 nCount = m_charList.GetSize() - start; 561 nCount = m_charList.GetSize() - start;
572 return m_TextBuf.GetWideString().Mid(start, 562 return m_TextBuf.GetWideString().Mid(start,
573 m_TextBuf.GetWideString().GetLength()); 563 m_TextBuf.GetWideString().GetLength());
574 } 564 }
575 if (nCount <= 0 || m_charList.GetSize() <= 0) { 565 if (nCount <= 0 || m_charList.GetSize() <= 0) {
576 return L""; 566 return L"";
577 } 567 }
578 if (nCount + start > m_charList.GetSize() - 1) { 568 if (nCount + start > m_charList.GetSize() - 1) {
579 nCount = m_charList.GetSize() - start; 569 nCount = m_charList.GetSize() - start;
(...skipping 23 matching lines...) Expand all
603 charinfo = 593 charinfo =
604 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); 594 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1);
605 } 595 }
606 nCount = start + nCount - nCountOffset - startindex; 596 nCount = start + nCount - nCountOffset - startindex;
607 if (nCount <= 0) { 597 if (nCount <= 0) {
608 return L""; 598 return L"";
609 } 599 }
610 return m_TextBuf.GetWideString().Mid(startindex, nCount); 600 return m_TextBuf.GetWideString().Mid(startindex, nCount);
611 } 601 }
612 int CPDF_TextPage::CountRects(int start, int nCount) { 602 int CPDF_TextPage::CountRects(int start, int nCount) {
613 if (m_ParseOptions.m_bGetCharCodeOnly) { 603 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0)
614 return -1; 604 return -1;
615 } 605
616 if (!m_IsParsered) {
617 return -1;
618 }
619 if (start < 0) {
620 return -1;
621 }
622 if (nCount == -1 || nCount + start > m_charList.GetSize()) { 606 if (nCount == -1 || nCount + start > m_charList.GetSize()) {
623 nCount = m_charList.GetSize() - start; 607 nCount = m_charList.GetSize() - start;
624 } 608 }
625 m_SelRects.RemoveAll(); 609 m_SelRects.RemoveAll();
626 GetRectArray(start, nCount, m_SelRects); 610 GetRectArray(start, nCount, m_SelRects);
627 return m_SelRects.GetSize(); 611 return m_SelRects.GetSize();
628 } 612 }
629 void CPDF_TextPage::GetRect(int rectIndex, 613 void CPDF_TextPage::GetRect(int rectIndex,
630 FX_FLOAT& left, 614 FX_FLOAT& left,
631 FX_FLOAT& top, 615 FX_FLOAT& top,
632 FX_FLOAT& right, 616 FX_FLOAT& right,
633 FX_FLOAT& bottom) const { 617 FX_FLOAT& bottom) const {
634 if (m_ParseOptions.m_bGetCharCodeOnly) { 618 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
635 return; 619 return;
636 } 620
637 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { 621 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
638 return; 622 return;
639 } 623
640 left = m_SelRects.GetAt(rectIndex).left; 624 left = m_SelRects.GetAt(rectIndex).left;
641 top = m_SelRects.GetAt(rectIndex).top; 625 top = m_SelRects.GetAt(rectIndex).top;
642 right = m_SelRects.GetAt(rectIndex).right; 626 right = m_SelRects.GetAt(rectIndex).right;
643 bottom = m_SelRects.GetAt(rectIndex).bottom; 627 bottom = m_SelRects.GetAt(rectIndex).bottom;
644 } 628 }
645 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { 629 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
646 if (m_ParseOptions.m_bGetCharCodeOnly) { 630 if (m_ParseOptions.m_bGetCharCodeOnly) {
647 return FALSE; 631 return FALSE;
648 } 632 }
649 if (end == start) { 633 if (end == start) {
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
696 GetBoundedSegment(n - 1, start, count); 680 GetBoundedSegment(n - 1, start, count);
697 end = start + count - 1; 681 end = start + count - 1;
698 GetBoundedSegment(0, start, count); 682 GetBoundedSegment(0, start, count);
699 } else { 683 } else {
700 GetBoundedSegment(0, start, count); 684 GetBoundedSegment(0, start, count);
701 end = start + count - 1; 685 end = start + count - 1;
702 } 686 }
703 return GetBaselineRotate(start, end, Rotate); 687 return GetBaselineRotate(start, end, Rotate);
704 } 688 }
705 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { 689 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
706 if (m_ParseOptions.m_bGetCharCodeOnly) { 690 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
707 return FALSE; 691 return FALSE;
708 } 692
709 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { 693 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize())
710 return FALSE; 694 return FALSE;
711 } 695
712 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); 696 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
713 return GetBaselineRotate(rect, Rotate); 697 return GetBaselineRotate(rect, Rotate);
714 } 698 }
715 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, 699 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
716 FX_FLOAT top, 700 FX_FLOAT top,
717 FX_FLOAT right, 701 FX_FLOAT right,
718 FX_FLOAT bottom, 702 FX_FLOAT bottom,
719 FX_BOOL bContains) { 703 FX_BOOL bContains) {
720 if (m_ParseOptions.m_bGetCharCodeOnly) { 704 if (m_ParseOptions.m_bGetCharCodeOnly)
721 return -1; 705 return -1;
722 } 706
723 m_Segment.RemoveAll(); 707 m_Segment.RemoveAll();
724 if (!m_IsParsered) { 708 if (!m_bIsParsed)
725 return -1; 709 return -1;
726 } 710
727 CFX_FloatRect rect(left, bottom, right, top); 711 CFX_FloatRect rect(left, bottom, right, top);
728 rect.Normalize(); 712 rect.Normalize();
729 int nCount = m_charList.GetSize(); 713 int nCount = m_charList.GetSize();
730 int pos = 0; 714 int pos = 0;
731 FPDF_SEGMENT segment; 715 FPDF_SEGMENT segment;
732 segment.m_Start = 0; 716 segment.m_Start = 0;
733 segment.m_nCount = 0; 717 segment.m_nCount = 0;
734 int segmentStatus = 0; 718 int segmentStatus = 0;
735 FX_BOOL IsContainPreChar = FALSE; 719 FX_BOOL IsContainPreChar = FALSE;
736 while (pos < nCount) { 720 while (pos < nCount) {
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
796 if (m_ParseOptions.m_bGetCharCodeOnly) { 780 if (m_ParseOptions.m_bGetCharCodeOnly) {
797 return; 781 return;
798 } 782 }
799 if (index < 0 || index >= m_Segment.GetSize()) { 783 if (index < 0 || index >= m_Segment.GetSize()) {
800 return; 784 return;
801 } 785 }
802 start = m_Segment.GetAt(index).m_Start; 786 start = m_Segment.GetAt(index).m_Start;
803 count = m_Segment.GetAt(index).m_nCount; 787 count = m_Segment.GetAt(index).m_nCount;
804 } 788 }
805 int CPDF_TextPage::GetWordBreak(int index, int direction) const { 789 int CPDF_TextPage::GetWordBreak(int index, int direction) const {
806 if (m_ParseOptions.m_bGetCharCodeOnly) { 790 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
807 return -1; 791 return -1;
808 } 792
809 if (!m_IsParsered) { 793 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
810 return -1; 794 return -1;
811 } 795
812 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { 796 if (index < 0 || index >= m_charList.GetSize())
813 return -1; 797 return -1;
814 } 798
815 if (index < 0 || index >= m_charList.GetSize()) {
816 return -1;
817 }
818 PAGECHAR_INFO charinfo; 799 PAGECHAR_INFO charinfo;
819 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); 800 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
820 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { 801 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
821 return index; 802 return index;
822 } 803 }
823 if (!IsLetter(charinfo.m_Unicode)) { 804 if (!IsLetter(charinfo.m_Unicode)) {
824 return index; 805 return index;
825 } 806 }
826 int breakPos = index; 807 int breakPos = index;
827 if (direction == FPDFTEXT_LEFT) { 808 if (direction == FPDFTEXT_LEFT) {
(...skipping 1721 matching lines...) Expand 10 before | Expand all | Expand 10 after
2549 rects.Copy(m_resArray); 2530 rects.Copy(m_resArray);
2550 } 2531 }
2551 int CPDF_TextPageFind::GetCurOrder() const { 2532 int CPDF_TextPageFind::GetCurOrder() const {
2552 return GetCharIndex(m_resStart); 2533 return GetCharIndex(m_resStart);
2553 } 2534 }
2554 int CPDF_TextPageFind::GetMatchedCount() const { 2535 int CPDF_TextPageFind::GetMatchedCount() const {
2555 int resStart = GetCharIndex(m_resStart); 2536 int resStart = GetCharIndex(m_resStart);
2556 int resEnd = GetCharIndex(m_resEnd); 2537 int resEnd = GetCharIndex(m_resEnd);
2557 return resEnd - resStart + 1; 2538 return resEnd - resStart + 1;
2558 } 2539 }
2559 CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} 2540
2541 CPDF_LinkExtract::CPDF_LinkExtract()
2542 : m_pTextPage(nullptr), m_bIsParsed(false) {
2543 }
2544
2560 CPDF_LinkExtract::~CPDF_LinkExtract() { 2545 CPDF_LinkExtract::~CPDF_LinkExtract() {
2561 DeleteLinkList(); 2546 DeleteLinkList();
2562 } 2547 }
2548
2563 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { 2549 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) {
2564 if (!pTextPage || !pTextPage->IsParsed()) { 2550 if (!pTextPage || !pTextPage->IsParsed())
2565 return FALSE; 2551 return FALSE;
2566 } 2552
2567 m_pTextPage = (const CPDF_TextPage*)pTextPage; 2553 m_pTextPage = (const CPDF_TextPage*)pTextPage;
2568 m_strPageText = m_pTextPage->GetPageText(0, -1); 2554 m_strPageText = m_pTextPage->GetPageText(0, -1);
2569 DeleteLinkList(); 2555 DeleteLinkList();
2570 if (m_strPageText.IsEmpty()) { 2556 if (m_strPageText.IsEmpty()) {
2571 return FALSE; 2557 return FALSE;
2572 } 2558 }
2573 parserLink(); 2559 ParseLink();
2574 m_IsParserd = TRUE; 2560 m_bIsParsed = true;
2575 return TRUE; 2561 return TRUE;
2576 } 2562 }
2563
2577 void CPDF_LinkExtract::DeleteLinkList() { 2564 void CPDF_LinkExtract::DeleteLinkList() {
2578 while (m_LinkList.GetSize()) { 2565 while (m_LinkList.GetSize()) {
2579 CPDF_LinkExt* linkinfo = NULL; 2566 CPDF_LinkExt* linkinfo = NULL;
2580 linkinfo = m_LinkList.GetAt(0); 2567 linkinfo = m_LinkList.GetAt(0);
2581 m_LinkList.RemoveAt(0); 2568 m_LinkList.RemoveAt(0);
2582 delete linkinfo; 2569 delete linkinfo;
2583 } 2570 }
2584 m_LinkList.RemoveAll(); 2571 m_LinkList.RemoveAll();
2585 } 2572 }
2586 int CPDF_LinkExtract::CountLinks() const { 2573 int CPDF_LinkExtract::CountLinks() const {
2587 if (!m_IsParserd) { 2574 if (!m_bIsParsed) {
2588 return -1; 2575 return -1;
2589 } 2576 }
2590 return m_LinkList.GetSize(); 2577 return m_LinkList.GetSize();
2591 } 2578 }
2592 void CPDF_LinkExtract::parserLink() { 2579 void CPDF_LinkExtract::ParseLink() {
2593 int start = 0, pos = 0; 2580 int start = 0, pos = 0;
2594 int TotalChar = m_pTextPage->CountChars(); 2581 int TotalChar = m_pTextPage->CountChars();
2595 while (pos < TotalChar) { 2582 while (pos < TotalChar) {
2596 FPDF_CHAR_INFO pageChar; 2583 FPDF_CHAR_INFO pageChar;
2597 m_pTextPage->GetCharInfo(pos, pageChar); 2584 m_pTextPage->GetCharInfo(pos, pageChar);
2598 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || 2585 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 ||
2599 pos == TotalChar - 1) { 2586 pos == TotalChar - 1) {
2600 int nCount = pos - start; 2587 int nCount = pos - start;
2601 if (pos == TotalChar - 1) { 2588 if (pos == TotalChar - 1) {
2602 nCount++; 2589 nCount++;
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
2734 int count, 2721 int count,
2735 const CFX_WideString& strUrl) { 2722 const CFX_WideString& strUrl) {
2736 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; 2723 CPDF_LinkExt* linkInfo = new CPDF_LinkExt;
2737 linkInfo->m_strUrl = strUrl; 2724 linkInfo->m_strUrl = strUrl;
2738 linkInfo->m_Start = start; 2725 linkInfo->m_Start = start;
2739 linkInfo->m_Count = count; 2726 linkInfo->m_Count = count;
2740 m_LinkList.Add(linkInfo); 2727 m_LinkList.Add(linkInfo);
2741 } 2728 }
2742 2729
2743 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { 2730 CFX_WideString CPDF_LinkExtract::GetURL(int index) const {
2744 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2731 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2745 return L""; 2732 return L"";
2746 } 2733 }
2747 CPDF_LinkExt* link = NULL; 2734 CPDF_LinkExt* link = NULL;
2748 link = m_LinkList.GetAt(index); 2735 link = m_LinkList.GetAt(index);
2749 if (!link) { 2736 if (!link) {
2750 return L""; 2737 return L"";
2751 } 2738 }
2752 return link->m_strUrl; 2739 return link->m_strUrl;
2753 } 2740 }
2754 void CPDF_LinkExtract::GetBoundedSegment(int index, 2741 void CPDF_LinkExtract::GetBoundedSegment(int index,
2755 int& start, 2742 int& start,
2756 int& count) const { 2743 int& count) const {
2757 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2744 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2758 return; 2745 return;
2759 } 2746 }
2760 CPDF_LinkExt* link = NULL; 2747 CPDF_LinkExt* link = NULL;
2761 link = m_LinkList.GetAt(index); 2748 link = m_LinkList.GetAt(index);
2762 if (!link) { 2749 if (!link) {
2763 return; 2750 return;
2764 } 2751 }
2765 start = link->m_Start; 2752 start = link->m_Start;
2766 count = link->m_Count; 2753 count = link->m_Count;
2767 } 2754 }
2768 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { 2755 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
2769 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2756 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2770 return; 2757 return;
2771 } 2758 }
2772 CPDF_LinkExt* link = NULL; 2759 CPDF_LinkExt* link = NULL;
2773 link = m_LinkList.GetAt(index); 2760 link = m_LinkList.GetAt(index);
2774 if (!link) { 2761 if (!link) {
2775 return; 2762 return;
2776 } 2763 }
2777 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2764 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2778 } 2765 }
OLDNEW
« no previous file with comments | « no previous file | core/src/fpdftext/text_int.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698