Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1629593002: Merge to XFA: CPDFText_ParseOptions never change from default. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@xfa
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/include/fpdftext/fpdf_text.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <cctype> 8 #include <cctype>
9 #include <cwctype> 9 #include <cwctype>
10 #include <memory> 10 #include <memory>
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 baseSpace = 0.0; 74 baseSpace = 0.0;
75 } 75 }
76 } 76 }
77 return baseSpace; 77 return baseSpace;
78 } 78 }
79 79
80 const FX_FLOAT kDefaultFontSize = 1.0f; 80 const FX_FLOAT kDefaultFontSize = 1.0f;
81 81
82 } // namespace 82 } // namespace
83 83
84 CPDFText_ParseOptions::CPDFText_ParseOptions()
85 : m_bGetCharCodeOnly(FALSE),
86 m_bNormalizeObjs(TRUE),
87 m_bOutputHyphen(FALSE) {}
88
89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, 84 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
90 int flags) { 85 int flags) {
91 return new CPDF_TextPage(pPage, flags); 86 return new CPDF_TextPage(pPage, flags);
92 } 87 }
93 88
94 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( 89 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
95 const IPDF_TextPage* pTextPage) { 90 const IPDF_TextPage* pTextPage) {
96 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; 91 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
97 } 92 }
98 93
(...skipping 17 matching lines...) Expand all
116 m_parserflag(flags), 111 m_parserflag(flags),
117 m_pPreTextObj(nullptr), 112 m_pPreTextObj(nullptr),
118 m_bIsParsed(false), 113 m_bIsParsed(false),
119 m_TextlineDir(-1), 114 m_TextlineDir(-1),
120 m_CurlineRect(0, 0, 0, 0) { 115 m_CurlineRect(0, 0, 0, 0) {
121 m_TextBuf.EstimateSize(0, 10240); 116 m_TextBuf.EstimateSize(0, 10240);
122 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 117 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
123 (int)pPage->GetPageHeight(), 0); 118 (int)pPage->GetPageHeight(), 0);
124 } 119 }
125 120
126 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) {
127 m_ParseOptions.m_bNormalizeObjs = bNormalize;
128 }
129 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { 121 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
130 switch (charInfo.m_Unicode) { 122 switch (charInfo.m_Unicode) {
131 case 0x2: 123 case 0x2:
132 case 0x3: 124 case 0x3:
133 case 0x93: 125 case 0x93:
134 case 0x94: 126 case 0x94:
135 case 0x96: 127 case 0x96:
136 case 0x97: 128 case 0x97:
137 case 0x98: 129 case 0x98:
138 case 0xfffe: 130 case 0xfffe:
139 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; 131 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
140 default: 132 default:
141 return false; 133 return false;
142 } 134 }
143 } 135 }
144 FX_BOOL CPDF_TextPage::ParseTextPage() { 136 FX_BOOL CPDF_TextPage::ParseTextPage() {
145 m_bIsParsed = false; 137 m_bIsParsed = false;
146 if (!m_pPage) 138 if (!m_pPage)
147 return FALSE; 139 return FALSE;
148 140
149 m_TextBuf.Clear(); 141 m_TextBuf.Clear();
150 m_charList.RemoveAll(); 142 m_charList.RemoveAll();
151 m_pPreTextObj = NULL; 143 m_pPreTextObj = NULL;
152 ProcessObject(); 144 ProcessObject();
153 m_bIsParsed = true; 145 m_bIsParsed = true;
154 if (!m_ParseOptions.m_bGetCharCodeOnly) { 146 m_CharIndex.RemoveAll();
155 m_CharIndex.RemoveAll(); 147 int nCount = m_charList.GetSize();
156 int nCount = m_charList.GetSize(); 148 if (nCount) {
157 if (nCount) { 149 m_CharIndex.Add(0);
158 m_CharIndex.Add(0); 150 }
151 for (int i = 0; i < nCount; i++) {
152 int indexSize = m_CharIndex.GetSize();
153 FX_BOOL bNormal = FALSE;
154 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
155 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
156 bNormal = TRUE;
157 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
158 bNormal = FALSE;
159 } else {
160 bNormal = TRUE;
159 } 161 }
160 for (int i = 0; i < nCount; i++) { 162 if (bNormal) {
161 int indexSize = m_CharIndex.GetSize(); 163 if (indexSize % 2) {
162 FX_BOOL bNormal = FALSE; 164 m_CharIndex.Add(1);
163 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
164 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
165 bNormal = TRUE;
166 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
167 bNormal = FALSE;
168 } else { 165 } else {
169 bNormal = TRUE; 166 if (indexSize <= 0) {
167 continue;
168 }
169 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1);
170 } 170 }
171 if (bNormal) { 171 } else {
172 if (indexSize % 2) { 172 if (indexSize % 2) {
173 m_CharIndex.Add(1); 173 if (indexSize <= 0) {
174 } else { 174 continue;
175 if (indexSize <= 0) {
176 continue;
177 }
178 m_CharIndex.SetAt(indexSize - 1,
179 m_CharIndex.GetAt(indexSize - 1) + 1);
180 } 175 }
176 m_CharIndex.SetAt(indexSize - 1, i + 1);
181 } else { 177 } else {
182 if (indexSize % 2) { 178 m_CharIndex.Add(i + 1);
183 if (indexSize <= 0) {
184 continue;
185 }
186 m_CharIndex.SetAt(indexSize - 1, i + 1);
187 } else {
188 m_CharIndex.Add(i + 1);
189 }
190 } 179 }
191 } 180 }
192 int indexSize = m_CharIndex.GetSize(); 181 }
193 if (indexSize % 2) { 182 int indexSize = m_CharIndex.GetSize();
194 m_CharIndex.RemoveAt(indexSize - 1); 183 if (indexSize % 2) {
195 } 184 m_CharIndex.RemoveAt(indexSize - 1);
196 } 185 }
197 return TRUE; 186 return TRUE;
198 } 187 }
199 int CPDF_TextPage::CountChars() const { 188 int CPDF_TextPage::CountChars() const {
200 if (m_ParseOptions.m_bGetCharCodeOnly) {
201 return m_TextBuf.GetSize();
202 }
203 return m_charList.GetSize(); 189 return m_charList.GetSize();
204 } 190 }
205 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { 191 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
206 int indexSize = m_CharIndex.GetSize(); 192 int indexSize = m_CharIndex.GetSize();
207 int count = 0; 193 int count = 0;
208 for (int i = 0; i < indexSize; i += 2) { 194 for (int i = 0; i < indexSize; i += 2) {
209 count += m_CharIndex.GetAt(i + 1); 195 count += m_CharIndex.GetAt(i + 1);
210 if (count > TextIndex) { 196 if (count > TextIndex) {
211 return TextIndex - count + m_CharIndex.GetAt(i + 1) + 197 return TextIndex - count + m_CharIndex.GetAt(i + 1) +
212 m_CharIndex.GetAt(i); 198 m_CharIndex.GetAt(i);
(...skipping 12 matching lines...) Expand all
225 } 211 }
226 return CharIndex - m_CharIndex.GetAt(i) + count - 212 return CharIndex - m_CharIndex.GetAt(i) + count -
227 m_CharIndex.GetAt(i + 1); 213 m_CharIndex.GetAt(i + 1);
228 } 214 }
229 } 215 }
230 return -1; 216 return -1;
231 } 217 }
232 void CPDF_TextPage::GetRectArray(int start, 218 void CPDF_TextPage::GetRectArray(int start,
233 int nCount, 219 int nCount,
234 CFX_RectArray& rectArray) const { 220 CFX_RectArray& rectArray) const {
235 if (m_ParseOptions.m_bGetCharCodeOnly) {
236 return;
237 }
238 if (start < 0 || nCount == 0) { 221 if (start < 0 || nCount == 0) {
239 return; 222 return;
240 } 223 }
241 if (!m_bIsParsed) { 224 if (!m_bIsParsed) {
242 return; 225 return;
243 } 226 }
244 PAGECHAR_INFO info_curchar; 227 PAGECHAR_INFO info_curchar;
245 CPDF_TextObject* pCurObj = NULL; 228 CPDF_TextObject* pCurObj = NULL;
246 CFX_FloatRect rect; 229 CFX_FloatRect rect;
247 int curPos = start; 230 int curPos = start;
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
314 rect.bottom = info_curchar.m_CharBox.bottom; 297 rect.bottom = info_curchar.m_CharBox.bottom;
315 } 298 }
316 } 299 }
317 } 300 }
318 rectArray.Add(rect); 301 rectArray.Add(rect);
319 return; 302 return;
320 } 303 }
321 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, 304 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
322 FX_FLOAT xTolerance, 305 FX_FLOAT xTolerance,
323 FX_FLOAT yTolerance) const { 306 FX_FLOAT yTolerance) const {
324 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 307 if (!m_bIsParsed)
325 return -3; 308 return -3;
326 309
327 int pos = 0; 310 int pos = 0;
328 int NearPos = -1; 311 int NearPos = -1;
329 double xdif = 5000, ydif = 5000; 312 double xdif = 5000, ydif = 5000;
330 while (pos < m_charList.GetSize()) { 313 while (pos < m_charList.GetSize()) {
331 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); 314 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos));
332 CFX_FloatRect charrect = charinfo.m_CharBox; 315 CFX_FloatRect charrect = charinfo.m_CharBox;
333 if (charrect.Contains(point.x, point.y)) { 316 if (charrect.Contains(point.x, point.y)) {
334 break; 317 break;
(...skipping 24 matching lines...) Expand all
359 } 342 }
360 ++pos; 343 ++pos;
361 } 344 }
362 if (pos >= m_charList.GetSize()) { 345 if (pos >= m_charList.GetSize()) {
363 pos = NearPos; 346 pos = NearPos;
364 } 347 }
365 return pos; 348 return pos;
366 } 349 }
367 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { 350 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
368 CFX_WideString strText; 351 CFX_WideString strText;
369 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 352 if (!m_bIsParsed)
370 return strText; 353 return strText;
371 354
372 int nCount = m_charList.GetSize(); 355 int nCount = m_charList.GetSize();
373 int pos = 0; 356 int pos = 0;
374 FX_FLOAT posy = 0; 357 FX_FLOAT posy = 0;
375 FX_BOOL IsContainPreChar = FALSE; 358 FX_BOOL IsContainPreChar = FALSE;
376 FX_BOOL ISAddLineFeed = FALSE; 359 FX_BOOL ISAddLineFeed = FALSE;
377 while (pos < nCount) { 360 while (pos < nCount) {
378 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); 361 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
379 if (IsRectIntersect(rect, charinfo.m_CharBox)) { 362 if (IsRectIntersect(rect, charinfo.m_CharBox)) {
(...skipping 17 matching lines...) Expand all
397 } 380 }
398 } else { 381 } else {
399 IsContainPreChar = FALSE; 382 IsContainPreChar = FALSE;
400 ISAddLineFeed = TRUE; 383 ISAddLineFeed = TRUE;
401 } 384 }
402 } 385 }
403 return strText; 386 return strText;
404 } 387 }
405 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, 388 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
406 CFX_RectArray& resRectArray) const { 389 CFX_RectArray& resRectArray) const {
407 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 390 if (!m_bIsParsed)
408 return; 391 return;
409 392
410 CFX_FloatRect curRect; 393 CFX_FloatRect curRect;
411 FX_BOOL flagNewRect = TRUE; 394 FX_BOOL flagNewRect = TRUE;
412 CPDF_TextObject* pCurObj = NULL; 395 CPDF_TextObject* pCurObj = NULL;
413 int nCount = m_charList.GetSize(); 396 int nCount = m_charList.GetSize();
414 int pos = 0; 397 int pos = 0;
415 while (pos < nCount) { 398 while (pos < nCount) {
416 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); 399 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
417 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { 400 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
(...skipping 29 matching lines...) Expand all
447 } 430 }
448 } 431 }
449 } 432 }
450 resRectArray.Add(curRect); 433 resRectArray.Add(curRect);
451 return; 434 return;
452 } 435 }
453 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, 436 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
454 FX_FLOAT y, 437 FX_FLOAT y,
455 FX_FLOAT xTolerance, 438 FX_FLOAT xTolerance,
456 FX_FLOAT yTolerance) const { 439 FX_FLOAT yTolerance) const {
457 if (m_ParseOptions.m_bGetCharCodeOnly) {
458 return -3;
459 }
460 CPDF_Point point(x, y); 440 CPDF_Point point(x, y);
461 return GetIndexAtPos(point, xTolerance, yTolerance); 441 return GetIndexAtPos(point, xTolerance, yTolerance);
462 } 442 }
463 443
464 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { 444 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const {
465 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 445 if (!m_bIsParsed)
466 return; 446 return;
467 447
468 if (index < 0 || index >= m_charList.GetSize()) 448 if (index < 0 || index >= m_charList.GetSize())
469 return; 449 return;
470 450
471 const PAGECHAR_INFO* charinfo = 451 const PAGECHAR_INFO* charinfo =
472 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); 452 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index));
473 info->m_Charcode = charinfo->m_CharCode; 453 info->m_Charcode = charinfo->m_CharCode;
474 info->m_OriginX = charinfo->m_OriginX; 454 info->m_OriginX = charinfo->m_OriginX;
475 info->m_OriginY = charinfo->m_OriginY; 455 info->m_OriginY = charinfo->m_OriginY;
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
566 charinfo = 546 charinfo =
567 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); 547 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1);
568 } 548 }
569 nCount = start + nCount - nCountOffset - startindex; 549 nCount = start + nCount - nCountOffset - startindex;
570 if (nCount <= 0) { 550 if (nCount <= 0) {
571 return L""; 551 return L"";
572 } 552 }
573 return m_TextBuf.GetWideString().Mid(startindex, nCount); 553 return m_TextBuf.GetWideString().Mid(startindex, nCount);
574 } 554 }
575 int CPDF_TextPage::CountRects(int start, int nCount) { 555 int CPDF_TextPage::CountRects(int start, int nCount) {
576 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) 556 if (!m_bIsParsed || start < 0)
577 return -1; 557 return -1;
578 558
579 if (nCount == -1 || nCount + start > m_charList.GetSize()) { 559 if (nCount == -1 || nCount + start > m_charList.GetSize()) {
580 nCount = m_charList.GetSize() - start; 560 nCount = m_charList.GetSize() - start;
581 } 561 }
582 m_SelRects.RemoveAll(); 562 m_SelRects.RemoveAll();
583 GetRectArray(start, nCount, m_SelRects); 563 GetRectArray(start, nCount, m_SelRects);
584 return m_SelRects.GetSize(); 564 return m_SelRects.GetSize();
585 } 565 }
586 void CPDF_TextPage::GetRect(int rectIndex, 566 void CPDF_TextPage::GetRect(int rectIndex,
587 FX_FLOAT& left, 567 FX_FLOAT& left,
588 FX_FLOAT& top, 568 FX_FLOAT& top,
589 FX_FLOAT& right, 569 FX_FLOAT& right,
590 FX_FLOAT& bottom) const { 570 FX_FLOAT& bottom) const {
591 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 571 if (!m_bIsParsed)
592 return; 572 return;
593 573
594 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) 574 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
595 return; 575 return;
596 576
597 left = m_SelRects.GetAt(rectIndex).left; 577 left = m_SelRects.GetAt(rectIndex).left;
598 top = m_SelRects.GetAt(rectIndex).top; 578 top = m_SelRects.GetAt(rectIndex).top;
599 right = m_SelRects.GetAt(rectIndex).right; 579 right = m_SelRects.GetAt(rectIndex).right;
600 bottom = m_SelRects.GetAt(rectIndex).bottom; 580 bottom = m_SelRects.GetAt(rectIndex).bottom;
601 } 581 }
602 582
603 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { 583 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
604 if (m_ParseOptions.m_bGetCharCodeOnly) {
605 return FALSE;
606 }
607 if (end == start) { 584 if (end == start) {
608 return FALSE; 585 return FALSE;
609 } 586 }
610 FPDF_CHAR_INFO info_start; 587 FPDF_CHAR_INFO info_start;
611 FPDF_CHAR_INFO info_end; 588 FPDF_CHAR_INFO info_end;
612 GetCharInfo(start, &info_start); 589 GetCharInfo(start, &info_start);
613 GetCharInfo(end, &info_end); 590 GetCharInfo(end, &info_end);
614 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { 591 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) {
615 if (--end <= start) 592 if (--end <= start)
616 return FALSE; 593 return FALSE;
(...skipping 17 matching lines...) Expand all
634 if (Rotate < 0) { 611 if (Rotate < 0) {
635 Rotate = -Rotate; 612 Rotate = -Rotate;
636 } else if (Rotate > 0) { 613 } else if (Rotate > 0) {
637 Rotate = 360 - Rotate; 614 Rotate = 360 - Rotate;
638 } 615 }
639 return TRUE; 616 return TRUE;
640 } 617 }
641 618
642 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, 619 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
643 int& Rotate) { 620 int& Rotate) {
644 if (m_ParseOptions.m_bGetCharCodeOnly) {
645 return FALSE;
646 }
647 int start, end, count, 621 int start, end, count,
648 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, 622 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom,
649 TRUE); 623 TRUE);
650 if (n < 1) { 624 if (n < 1) {
651 return FALSE; 625 return FALSE;
652 } 626 }
653 if (n > 1) { 627 if (n > 1) {
654 GetBoundedSegment(n - 1, start, count); 628 GetBoundedSegment(n - 1, start, count);
655 end = start + count - 1; 629 end = start + count - 1;
656 GetBoundedSegment(0, start, count); 630 GetBoundedSegment(0, start, count);
657 } else { 631 } else {
658 GetBoundedSegment(0, start, count); 632 GetBoundedSegment(0, start, count);
659 end = start + count - 1; 633 end = start + count - 1;
660 } 634 }
661 return GetBaselineRotate(start, end, Rotate); 635 return GetBaselineRotate(start, end, Rotate);
662 } 636 }
663 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { 637 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
664 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 638 if (!m_bIsParsed)
665 return FALSE; 639 return FALSE;
666 640
667 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) 641 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
668 return FALSE; 642 return FALSE;
669 643
670 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); 644 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
671 return GetBaselineRotate(rect, Rotate); 645 return GetBaselineRotate(rect, Rotate);
672 } 646 }
673 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, 647 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
674 FX_FLOAT top, 648 FX_FLOAT top,
675 FX_FLOAT right, 649 FX_FLOAT right,
676 FX_FLOAT bottom, 650 FX_FLOAT bottom,
677 FX_BOOL bContains) { 651 FX_BOOL bContains) {
678 if (m_ParseOptions.m_bGetCharCodeOnly)
679 return -1;
680
681 m_Segment.RemoveAll(); 652 m_Segment.RemoveAll();
682 if (!m_bIsParsed) 653 if (!m_bIsParsed)
683 return -1; 654 return -1;
684 655
685 CFX_FloatRect rect(left, bottom, right, top); 656 CFX_FloatRect rect(left, bottom, right, top);
686 rect.Normalize(); 657 rect.Normalize();
687 int nCount = m_charList.GetSize(); 658 int nCount = m_charList.GetSize();
688 int pos = 0; 659 int pos = 0;
689 FPDF_SEGMENT segment; 660 FPDF_SEGMENT segment;
690 segment.m_Start = 0; 661 segment.m_Start = 0;
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
744 } 715 }
745 if (segmentStatus == 1) { 716 if (segmentStatus == 1) {
746 segmentStatus = 2; 717 segmentStatus = 2;
747 m_Segment.Add(segment); 718 m_Segment.Add(segment);
748 segment.m_Start = 0; 719 segment.m_Start = 0;
749 segment.m_nCount = 0; 720 segment.m_nCount = 0;
750 } 721 }
751 return m_Segment.GetSize(); 722 return m_Segment.GetSize();
752 } 723 }
753 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { 724 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
754 if (m_ParseOptions.m_bGetCharCodeOnly) {
755 return;
756 }
757 if (index < 0 || index >= m_Segment.GetSize()) { 725 if (index < 0 || index >= m_Segment.GetSize()) {
758 return; 726 return;
759 } 727 }
760 start = m_Segment.GetAt(index).m_Start; 728 start = m_Segment.GetAt(index).m_Start;
761 count = m_Segment.GetAt(index).m_nCount; 729 count = m_Segment.GetAt(index).m_nCount;
762 } 730 }
763 int CPDF_TextPage::GetWordBreak(int index, int direction) const { 731 int CPDF_TextPage::GetWordBreak(int index, int direction) const {
764 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) 732 if (!m_bIsParsed)
765 return -1; 733 return -1;
766 734
767 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) 735 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
768 return -1; 736 return -1;
769 737
770 if (index < 0 || index >= m_charList.GetSize()) 738 if (index < 0 || index >= m_charList.GetSize())
771 return -1; 739 return -1;
772 740
773 PAGECHAR_INFO charinfo; 741 PAGECHAR_INFO charinfo;
774 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); 742 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
1000 FX_WCHAR* pDst = NULL; 968 FX_WCHAR* pDst = NULL;
1001 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 969 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
1002 if (nCount >= 1) { 970 if (nCount >= 1) {
1003 pDst = FX_Alloc(FX_WCHAR, nCount); 971 pDst = FX_Alloc(FX_WCHAR, nCount);
1004 FX_Unicode_GetNormalization(wChar, pDst); 972 FX_Unicode_GetNormalization(wChar, pDst);
1005 for (int nIndex = 0; nIndex < nCount; nIndex++) { 973 for (int nIndex = 0; nIndex < nCount; nIndex++) {
1006 PAGECHAR_INFO Info2 = Info; 974 PAGECHAR_INFO Info2 = Info;
1007 Info2.m_Unicode = pDst[nIndex]; 975 Info2.m_Unicode = pDst[nIndex];
1008 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; 976 Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
1009 m_TextBuf.AppendChar(Info2.m_Unicode); 977 m_TextBuf.AppendChar(Info2.m_Unicode);
1010 if (!m_ParseOptions.m_bGetCharCodeOnly) { 978 m_charList.Add(Info2);
1011 m_charList.Add(Info2);
1012 }
1013 } 979 }
1014 FX_Free(pDst); 980 FX_Free(pDst);
1015 return; 981 return;
1016 } 982 }
1017 } 983 }
1018 m_TextBuf.AppendChar(wChar); 984 m_TextBuf.AppendChar(wChar);
1019 } else { 985 } else {
1020 Info.m_Index = -1; 986 Info.m_Index = -1;
1021 } 987 }
1022 if (!m_ParseOptions.m_bGetCharCodeOnly) { 988 m_charList.Add(Info);
1023 m_charList.Add(Info);
1024 }
1025 } 989 }
1026 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { 990 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) {
1027 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); 991 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
1028 if (!IsControlChar(Info)) { 992 if (!IsControlChar(Info)) {
1029 Info.m_Index = m_TextBuf.GetLength(); 993 Info.m_Index = m_TextBuf.GetLength();
1030 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); 994 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE);
1031 FX_WCHAR* pDst = NULL; 995 FX_WCHAR* pDst = NULL;
1032 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 996 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
1033 if (nCount >= 1) { 997 if (nCount >= 1) {
1034 pDst = FX_Alloc(FX_WCHAR, nCount); 998 pDst = FX_Alloc(FX_WCHAR, nCount);
1035 FX_Unicode_GetNormalization(wChar, pDst); 999 FX_Unicode_GetNormalization(wChar, pDst);
1036 for (int nIndex = 0; nIndex < nCount; nIndex++) { 1000 for (int nIndex = 0; nIndex < nCount; nIndex++) {
1037 PAGECHAR_INFO Info2 = Info; 1001 PAGECHAR_INFO Info2 = Info;
1038 Info2.m_Unicode = pDst[nIndex]; 1002 Info2.m_Unicode = pDst[nIndex];
1039 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; 1003 Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
1040 m_TextBuf.AppendChar(Info2.m_Unicode); 1004 m_TextBuf.AppendChar(Info2.m_Unicode);
1041 if (!m_ParseOptions.m_bGetCharCodeOnly) { 1005 m_charList.Add(Info2);
1042 m_charList.Add(Info2);
1043 }
1044 } 1006 }
1045 FX_Free(pDst); 1007 FX_Free(pDst);
1046 return; 1008 return;
1047 } 1009 }
1048 Info.m_Unicode = wChar; 1010 Info.m_Unicode = wChar;
1049 m_TextBuf.AppendChar(Info.m_Unicode); 1011 m_TextBuf.AppendChar(Info.m_Unicode);
1050 } else { 1012 } else {
1051 Info.m_Index = -1; 1013 Info.m_Index = -1;
1052 } 1014 }
1053 if (!m_ParseOptions.m_bGetCharCodeOnly) { 1015 m_charList.Add(Info);
1054 m_charList.Add(Info);
1055 }
1056 } 1016 }
1057 void CPDF_TextPage::CloseTempLine() { 1017 void CPDF_TextPage::CloseTempLine() {
1058 int count1 = m_TempCharList.GetSize(); 1018 int count1 = m_TempCharList.GetSize();
1059 if (count1 <= 0) { 1019 if (count1 <= 0) {
1060 return; 1020 return;
1061 } 1021 }
1062 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); 1022 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar);
1063 CFX_WideString str = m_TempTextBuf.GetWideString(); 1023 CFX_WideString str = m_TempTextBuf.GetWideString();
1064 CFX_WordArray order; 1024 CFX_WordArray order;
1065 FX_BOOL bR2L = FALSE; 1025 FX_BOOL bR2L = FALSE;
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
1244 m_DisplayMatrix.Transform(this_x, this_y); 1204 m_DisplayMatrix.Transform(this_x, this_y);
1245 if (FXSYS_fabs(this_y - prev_y) > threshold * 2) { 1205 if (FXSYS_fabs(this_y - prev_y) > threshold * 2) {
1246 for (int i = 0; i < count; i++) { 1206 for (int i = 0; i < count; i++) {
1247 ProcessTextObject(m_LineObj.GetAt(i)); 1207 ProcessTextObject(m_LineObj.GetAt(i));
1248 } 1208 }
1249 m_LineObj.RemoveAll(); 1209 m_LineObj.RemoveAll();
1250 m_LineObj.Add(Obj); 1210 m_LineObj.Add(Obj);
1251 return; 1211 return;
1252 } 1212 }
1253 int i = 0; 1213 int i = 0;
1254 if (m_ParseOptions.m_bNormalizeObjs) { 1214 for (i = count - 1; i >= 0; i--) {
1255 for (i = count - 1; i >= 0; i--) { 1215 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i);
1256 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); 1216 CFX_Matrix prev_matrix;
1257 CFX_Matrix prev_matrix; 1217 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
1258 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); 1218 FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(),
1259 FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), 1219 Prev_y = prev_Obj.m_pTextObj->GetPosY();
1260 Prev_y = prev_Obj.m_pTextObj->GetPosY(); 1220 prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y);
1261 prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); 1221 m_DisplayMatrix.Transform(Prev_x, Prev_y);
1262 m_DisplayMatrix.Transform(Prev_x, Prev_y); 1222 if (this_x >= Prev_x) {
1263 if (this_x >= Prev_x) { 1223 if (i == count - 1) {
1264 if (i == count - 1) { 1224 m_LineObj.Add(Obj);
1265 m_LineObj.Add(Obj); 1225 } else {
1266 } else { 1226 m_LineObj.InsertAt(i + 1, Obj);
1267 m_LineObj.InsertAt(i + 1, Obj);
1268 }
1269 break;
1270 } 1227 }
1228 break;
1271 } 1229 }
1272 if (i < 0) { 1230 }
1273 m_LineObj.InsertAt(0, Obj); 1231 if (i < 0) {
1274 } 1232 m_LineObj.InsertAt(0, Obj);
1275 } else {
1276 m_LineObj.Add(Obj);
1277 } 1233 }
1278 } 1234 }
1235
1279 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { 1236 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
1280 CPDF_TextObject* pTextObj = Obj.m_pTextObj; 1237 CPDF_TextObject* pTextObj = Obj.m_pTextObj;
1281 CPDF_ContentMarkData* pMarkData = 1238 CPDF_ContentMarkData* pMarkData =
1282 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); 1239 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
1283 if (!pMarkData) { 1240 if (!pMarkData) {
1284 return FPDFTEXT_MC_PASS; 1241 return FPDFTEXT_MC_PASS;
1285 } 1242 }
1286 int nContentMark = pMarkData->CountItems(); 1243 int nContentMark = pMarkData->CountItems();
1287 if (nContentMark < 1) { 1244 if (nContentMark < 1) {
1288 return FPDFTEXT_MC_PASS; 1245 return FPDFTEXT_MC_PASS;
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
1517 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { 1474 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) {
1518 if (!formMatrix.IsIdentity()) { 1475 if (!formMatrix.IsIdentity()) {
1519 generateChar.m_Matrix.Copy(formMatrix); 1476 generateChar.m_Matrix.Copy(formMatrix);
1520 } 1477 }
1521 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); 1478 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
1522 m_TempCharList.Add(generateChar); 1479 m_TempCharList.Add(generateChar);
1523 } 1480 }
1524 } else if (result == 2) { 1481 } else if (result == 2) {
1525 CloseTempLine(); 1482 CloseTempLine();
1526 if (m_TextBuf.GetSize()) { 1483 if (m_TextBuf.GetSize()) {
1527 if (m_ParseOptions.m_bGetCharCodeOnly) { 1484 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
1528 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); 1485 m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
1486 if (!formMatrix.IsIdentity()) {
1487 generateChar.m_Matrix.Copy(formMatrix);
1488 }
1489 m_charList.Add(generateChar);
1490 }
1491 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
1529 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); 1492 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
1530 } else { 1493 if (!formMatrix.IsIdentity()) {
1531 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { 1494 generateChar.m_Matrix.Copy(formMatrix);
1532 m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
1533 if (!formMatrix.IsIdentity()) {
1534 generateChar.m_Matrix.Copy(formMatrix);
1535 }
1536 m_charList.Add(generateChar);
1537 } 1495 }
1538 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { 1496 m_charList.Add(generateChar);
1539 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
1540 if (!formMatrix.IsIdentity()) {
1541 generateChar.m_Matrix.Copy(formMatrix);
1542 }
1543 m_charList.Add(generateChar);
1544 }
1545 } 1497 }
1546 } 1498 }
1547 } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { 1499 } else if (result == 3) {
1548 int32_t nChars = pTextObj->CountChars(); 1500 int32_t nChars = pTextObj->CountChars();
1549 if (nChars == 1) { 1501 if (nChars == 1) {
1550 CPDF_TextObjectItem item; 1502 CPDF_TextObjectItem item;
1551 pTextObj->GetCharInfo(0, &item); 1503 pTextObj->GetCharInfo(0, &item);
1552 CFX_WideString wstrItem = 1504 CFX_WideString wstrItem =
1553 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); 1505 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
1554 if (wstrItem.IsEmpty()) { 1506 if (wstrItem.IsEmpty()) {
1555 wstrItem += (FX_WCHAR)item.m_CharCode; 1507 wstrItem += (FX_WCHAR)item.m_CharCode;
1556 } 1508 }
1557 FX_WCHAR curChar = wstrItem.GetAt(0); 1509 FX_WCHAR curChar = wstrItem.GetAt(0);
(...skipping 420 matching lines...) Expand 10 before | Expand all | Expand 10 after
1978 } 1930 }
1979 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, 1931 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
1980 CPDF_TextObject* pTextObj2) { 1932 CPDF_TextObject* pTextObj2) {
1981 if (!pTextObj1 || !pTextObj2) { 1933 if (!pTextObj1 || !pTextObj2) {
1982 return FALSE; 1934 return FALSE;
1983 } 1935 }
1984 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, 1936 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom,
1985 pTextObj2->m_Right, pTextObj2->m_Top); 1937 pTextObj2->m_Right, pTextObj2->m_Top);
1986 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, 1938 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom,
1987 pTextObj1->m_Right, pTextObj1->m_Top); 1939 pTextObj1->m_Right, pTextObj1->m_Top);
1988 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && 1940 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
1989 !m_ParseOptions.m_bGetCharCodeOnly) {
1990 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); 1941 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left);
1991 int nCount = m_charList.GetSize(); 1942 int nCount = m_charList.GetSize();
1992 if (nCount >= 2) { 1943 if (nCount >= 2) {
1993 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2]; 1944 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2];
1994 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); 1945 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width();
1995 if (dbXdif > dbSpace) { 1946 if (dbXdif > dbSpace) {
1996 return FALSE; 1947 return FALSE;
1997 } 1948 }
1998 } 1949 }
1999 } 1950 }
(...skipping 718 matching lines...) Expand 10 before | Expand all | Expand 10 after
2718 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2669 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2719 return; 2670 return;
2720 } 2671 }
2721 CPDF_LinkExt* link = NULL; 2672 CPDF_LinkExt* link = NULL;
2722 link = m_LinkList.GetAt(index); 2673 link = m_LinkList.GetAt(index);
2723 if (!link) { 2674 if (!link) {
2724 return; 2675 return;
2725 } 2676 }
2726 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2677 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2727 } 2678 }
OLDNEW
« no previous file with comments | « core/include/fpdftext/fpdf_text.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698