OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
74 baseSpace = 0.0; | 74 baseSpace = 0.0; |
75 } | 75 } |
76 } | 76 } |
77 return baseSpace; | 77 return baseSpace; |
78 } | 78 } |
79 | 79 |
80 const FX_FLOAT kDefaultFontSize = 1.0f; | 80 const FX_FLOAT kDefaultFontSize = 1.0f; |
81 | 81 |
82 } // namespace | 82 } // namespace |
83 | 83 |
84 CPDFText_ParseOptions::CPDFText_ParseOptions() | |
85 : m_bGetCharCodeOnly(FALSE), | |
86 m_bNormalizeObjs(TRUE), | |
87 m_bOutputHyphen(FALSE) {} | |
88 | |
89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 84 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
90 int flags) { | 85 int flags) { |
91 return new CPDF_TextPage(pPage, flags); | 86 return new CPDF_TextPage(pPage, flags); |
92 } | 87 } |
93 | 88 |
94 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | 89 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
95 const IPDF_TextPage* pTextPage) { | 90 const IPDF_TextPage* pTextPage) { |
96 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; | 91 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
97 } | 92 } |
98 | 93 |
(...skipping 17 matching lines...) Expand all Loading... |
116 m_parserflag(flags), | 111 m_parserflag(flags), |
117 m_pPreTextObj(nullptr), | 112 m_pPreTextObj(nullptr), |
118 m_bIsParsed(false), | 113 m_bIsParsed(false), |
119 m_TextlineDir(-1), | 114 m_TextlineDir(-1), |
120 m_CurlineRect(0, 0, 0, 0) { | 115 m_CurlineRect(0, 0, 0, 0) { |
121 m_TextBuf.EstimateSize(0, 10240); | 116 m_TextBuf.EstimateSize(0, 10240); |
122 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 117 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
123 (int)pPage->GetPageHeight(), 0); | 118 (int)pPage->GetPageHeight(), 0); |
124 } | 119 } |
125 | 120 |
126 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { | |
127 m_ParseOptions.m_bNormalizeObjs = bNormalize; | |
128 } | |
129 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 121 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
130 switch (charInfo.m_Unicode) { | 122 switch (charInfo.m_Unicode) { |
131 case 0x2: | 123 case 0x2: |
132 case 0x3: | 124 case 0x3: |
133 case 0x93: | 125 case 0x93: |
134 case 0x94: | 126 case 0x94: |
135 case 0x96: | 127 case 0x96: |
136 case 0x97: | 128 case 0x97: |
137 case 0x98: | 129 case 0x98: |
138 case 0xfffe: | 130 case 0xfffe: |
139 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 131 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
140 default: | 132 default: |
141 return false; | 133 return false; |
142 } | 134 } |
143 } | 135 } |
144 FX_BOOL CPDF_TextPage::ParseTextPage() { | 136 FX_BOOL CPDF_TextPage::ParseTextPage() { |
145 m_bIsParsed = false; | 137 m_bIsParsed = false; |
146 if (!m_pPage) | 138 if (!m_pPage) |
147 return FALSE; | 139 return FALSE; |
148 | 140 |
149 m_TextBuf.Clear(); | 141 m_TextBuf.Clear(); |
150 m_charList.RemoveAll(); | 142 m_charList.RemoveAll(); |
151 m_pPreTextObj = NULL; | 143 m_pPreTextObj = NULL; |
152 ProcessObject(); | 144 ProcessObject(); |
153 m_bIsParsed = true; | 145 m_bIsParsed = true; |
154 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 146 m_CharIndex.RemoveAll(); |
155 m_CharIndex.RemoveAll(); | 147 int nCount = m_charList.GetSize(); |
156 int nCount = m_charList.GetSize(); | 148 if (nCount) { |
157 if (nCount) { | 149 m_CharIndex.Add(0); |
158 m_CharIndex.Add(0); | 150 } |
| 151 for (int i = 0; i < nCount; i++) { |
| 152 int indexSize = m_CharIndex.GetSize(); |
| 153 FX_BOOL bNormal = FALSE; |
| 154 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); |
| 155 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 156 bNormal = TRUE; |
| 157 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { |
| 158 bNormal = FALSE; |
| 159 } else { |
| 160 bNormal = TRUE; |
159 } | 161 } |
160 for (int i = 0; i < nCount; i++) { | 162 if (bNormal) { |
161 int indexSize = m_CharIndex.GetSize(); | 163 if (indexSize % 2) { |
162 FX_BOOL bNormal = FALSE; | 164 m_CharIndex.Add(1); |
163 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); | |
164 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | |
165 bNormal = TRUE; | |
166 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { | |
167 bNormal = FALSE; | |
168 } else { | 165 } else { |
169 bNormal = TRUE; | 166 if (indexSize <= 0) { |
| 167 continue; |
| 168 } |
| 169 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); |
170 } | 170 } |
171 if (bNormal) { | 171 } else { |
172 if (indexSize % 2) { | 172 if (indexSize % 2) { |
173 m_CharIndex.Add(1); | 173 if (indexSize <= 0) { |
174 } else { | 174 continue; |
175 if (indexSize <= 0) { | |
176 continue; | |
177 } | |
178 m_CharIndex.SetAt(indexSize - 1, | |
179 m_CharIndex.GetAt(indexSize - 1) + 1); | |
180 } | 175 } |
| 176 m_CharIndex.SetAt(indexSize - 1, i + 1); |
181 } else { | 177 } else { |
182 if (indexSize % 2) { | 178 m_CharIndex.Add(i + 1); |
183 if (indexSize <= 0) { | |
184 continue; | |
185 } | |
186 m_CharIndex.SetAt(indexSize - 1, i + 1); | |
187 } else { | |
188 m_CharIndex.Add(i + 1); | |
189 } | |
190 } | 179 } |
191 } | 180 } |
192 int indexSize = m_CharIndex.GetSize(); | 181 } |
193 if (indexSize % 2) { | 182 int indexSize = m_CharIndex.GetSize(); |
194 m_CharIndex.RemoveAt(indexSize - 1); | 183 if (indexSize % 2) { |
195 } | 184 m_CharIndex.RemoveAt(indexSize - 1); |
196 } | 185 } |
197 return TRUE; | 186 return TRUE; |
198 } | 187 } |
199 int CPDF_TextPage::CountChars() const { | 188 int CPDF_TextPage::CountChars() const { |
200 if (m_ParseOptions.m_bGetCharCodeOnly) { | |
201 return m_TextBuf.GetSize(); | |
202 } | |
203 return m_charList.GetSize(); | 189 return m_charList.GetSize(); |
204 } | 190 } |
205 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { | 191 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
206 int indexSize = m_CharIndex.GetSize(); | 192 int indexSize = m_CharIndex.GetSize(); |
207 int count = 0; | 193 int count = 0; |
208 for (int i = 0; i < indexSize; i += 2) { | 194 for (int i = 0; i < indexSize; i += 2) { |
209 count += m_CharIndex.GetAt(i + 1); | 195 count += m_CharIndex.GetAt(i + 1); |
210 if (count > TextIndex) { | 196 if (count > TextIndex) { |
211 return TextIndex - count + m_CharIndex.GetAt(i + 1) + | 197 return TextIndex - count + m_CharIndex.GetAt(i + 1) + |
212 m_CharIndex.GetAt(i); | 198 m_CharIndex.GetAt(i); |
(...skipping 12 matching lines...) Expand all Loading... |
225 } | 211 } |
226 return CharIndex - m_CharIndex.GetAt(i) + count - | 212 return CharIndex - m_CharIndex.GetAt(i) + count - |
227 m_CharIndex.GetAt(i + 1); | 213 m_CharIndex.GetAt(i + 1); |
228 } | 214 } |
229 } | 215 } |
230 return -1; | 216 return -1; |
231 } | 217 } |
232 void CPDF_TextPage::GetRectArray(int start, | 218 void CPDF_TextPage::GetRectArray(int start, |
233 int nCount, | 219 int nCount, |
234 CFX_RectArray& rectArray) const { | 220 CFX_RectArray& rectArray) const { |
235 if (m_ParseOptions.m_bGetCharCodeOnly) { | |
236 return; | |
237 } | |
238 if (start < 0 || nCount == 0) { | 221 if (start < 0 || nCount == 0) { |
239 return; | 222 return; |
240 } | 223 } |
241 if (!m_bIsParsed) { | 224 if (!m_bIsParsed) { |
242 return; | 225 return; |
243 } | 226 } |
244 PAGECHAR_INFO info_curchar; | 227 PAGECHAR_INFO info_curchar; |
245 CPDF_TextObject* pCurObj = NULL; | 228 CPDF_TextObject* pCurObj = NULL; |
246 CFX_FloatRect rect; | 229 CFX_FloatRect rect; |
247 int curPos = start; | 230 int curPos = start; |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 rect.bottom = info_curchar.m_CharBox.bottom; | 297 rect.bottom = info_curchar.m_CharBox.bottom; |
315 } | 298 } |
316 } | 299 } |
317 } | 300 } |
318 rectArray.Add(rect); | 301 rectArray.Add(rect); |
319 return; | 302 return; |
320 } | 303 } |
321 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, | 304 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
322 FX_FLOAT xTolerance, | 305 FX_FLOAT xTolerance, |
323 FX_FLOAT yTolerance) const { | 306 FX_FLOAT yTolerance) const { |
324 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 307 if (!m_bIsParsed) |
325 return -3; | 308 return -3; |
326 | 309 |
327 int pos = 0; | 310 int pos = 0; |
328 int NearPos = -1; | 311 int NearPos = -1; |
329 double xdif = 5000, ydif = 5000; | 312 double xdif = 5000, ydif = 5000; |
330 while (pos < m_charList.GetSize()) { | 313 while (pos < m_charList.GetSize()) { |
331 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); | 314 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); |
332 CFX_FloatRect charrect = charinfo.m_CharBox; | 315 CFX_FloatRect charrect = charinfo.m_CharBox; |
333 if (charrect.Contains(point.x, point.y)) { | 316 if (charrect.Contains(point.x, point.y)) { |
334 break; | 317 break; |
(...skipping 24 matching lines...) Expand all Loading... |
359 } | 342 } |
360 ++pos; | 343 ++pos; |
361 } | 344 } |
362 if (pos >= m_charList.GetSize()) { | 345 if (pos >= m_charList.GetSize()) { |
363 pos = NearPos; | 346 pos = NearPos; |
364 } | 347 } |
365 return pos; | 348 return pos; |
366 } | 349 } |
367 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { | 350 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
368 CFX_WideString strText; | 351 CFX_WideString strText; |
369 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 352 if (!m_bIsParsed) |
370 return strText; | 353 return strText; |
371 | 354 |
372 int nCount = m_charList.GetSize(); | 355 int nCount = m_charList.GetSize(); |
373 int pos = 0; | 356 int pos = 0; |
374 FX_FLOAT posy = 0; | 357 FX_FLOAT posy = 0; |
375 FX_BOOL IsContainPreChar = FALSE; | 358 FX_BOOL IsContainPreChar = FALSE; |
376 FX_BOOL ISAddLineFeed = FALSE; | 359 FX_BOOL ISAddLineFeed = FALSE; |
377 while (pos < nCount) { | 360 while (pos < nCount) { |
378 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 361 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
379 if (IsRectIntersect(rect, charinfo.m_CharBox)) { | 362 if (IsRectIntersect(rect, charinfo.m_CharBox)) { |
(...skipping 17 matching lines...) Expand all Loading... |
397 } | 380 } |
398 } else { | 381 } else { |
399 IsContainPreChar = FALSE; | 382 IsContainPreChar = FALSE; |
400 ISAddLineFeed = TRUE; | 383 ISAddLineFeed = TRUE; |
401 } | 384 } |
402 } | 385 } |
403 return strText; | 386 return strText; |
404 } | 387 } |
405 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, | 388 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
406 CFX_RectArray& resRectArray) const { | 389 CFX_RectArray& resRectArray) const { |
407 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 390 if (!m_bIsParsed) |
408 return; | 391 return; |
409 | 392 |
410 CFX_FloatRect curRect; | 393 CFX_FloatRect curRect; |
411 FX_BOOL flagNewRect = TRUE; | 394 FX_BOOL flagNewRect = TRUE; |
412 CPDF_TextObject* pCurObj = NULL; | 395 CPDF_TextObject* pCurObj = NULL; |
413 int nCount = m_charList.GetSize(); | 396 int nCount = m_charList.GetSize(); |
414 int pos = 0; | 397 int pos = 0; |
415 while (pos < nCount) { | 398 while (pos < nCount) { |
416 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 399 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
417 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 400 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
(...skipping 29 matching lines...) Expand all Loading... |
447 } | 430 } |
448 } | 431 } |
449 } | 432 } |
450 resRectArray.Add(curRect); | 433 resRectArray.Add(curRect); |
451 return; | 434 return; |
452 } | 435 } |
453 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, | 436 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
454 FX_FLOAT y, | 437 FX_FLOAT y, |
455 FX_FLOAT xTolerance, | 438 FX_FLOAT xTolerance, |
456 FX_FLOAT yTolerance) const { | 439 FX_FLOAT yTolerance) const { |
457 if (m_ParseOptions.m_bGetCharCodeOnly) { | |
458 return -3; | |
459 } | |
460 CPDF_Point point(x, y); | 440 CPDF_Point point(x, y); |
461 return GetIndexAtPos(point, xTolerance, yTolerance); | 441 return GetIndexAtPos(point, xTolerance, yTolerance); |
462 } | 442 } |
463 | 443 |
464 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { | 444 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { |
465 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 445 if (!m_bIsParsed) |
466 return; | 446 return; |
467 | 447 |
468 if (index < 0 || index >= m_charList.GetSize()) | 448 if (index < 0 || index >= m_charList.GetSize()) |
469 return; | 449 return; |
470 | 450 |
471 const PAGECHAR_INFO* charinfo = | 451 const PAGECHAR_INFO* charinfo = |
472 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); | 452 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); |
473 info->m_Charcode = charinfo->m_CharCode; | 453 info->m_Charcode = charinfo->m_CharCode; |
474 info->m_OriginX = charinfo->m_OriginX; | 454 info->m_OriginX = charinfo->m_OriginX; |
475 info->m_OriginY = charinfo->m_OriginY; | 455 info->m_OriginY = charinfo->m_OriginY; |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
566 charinfo = | 546 charinfo = |
567 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); | 547 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); |
568 } | 548 } |
569 nCount = start + nCount - nCountOffset - startindex; | 549 nCount = start + nCount - nCountOffset - startindex; |
570 if (nCount <= 0) { | 550 if (nCount <= 0) { |
571 return L""; | 551 return L""; |
572 } | 552 } |
573 return m_TextBuf.GetWideString().Mid(startindex, nCount); | 553 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
574 } | 554 } |
575 int CPDF_TextPage::CountRects(int start, int nCount) { | 555 int CPDF_TextPage::CountRects(int start, int nCount) { |
576 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) | 556 if (!m_bIsParsed || start < 0) |
577 return -1; | 557 return -1; |
578 | 558 |
579 if (nCount == -1 || nCount + start > m_charList.GetSize()) { | 559 if (nCount == -1 || nCount + start > m_charList.GetSize()) { |
580 nCount = m_charList.GetSize() - start; | 560 nCount = m_charList.GetSize() - start; |
581 } | 561 } |
582 m_SelRects.RemoveAll(); | 562 m_SelRects.RemoveAll(); |
583 GetRectArray(start, nCount, m_SelRects); | 563 GetRectArray(start, nCount, m_SelRects); |
584 return m_SelRects.GetSize(); | 564 return m_SelRects.GetSize(); |
585 } | 565 } |
586 void CPDF_TextPage::GetRect(int rectIndex, | 566 void CPDF_TextPage::GetRect(int rectIndex, |
587 FX_FLOAT& left, | 567 FX_FLOAT& left, |
588 FX_FLOAT& top, | 568 FX_FLOAT& top, |
589 FX_FLOAT& right, | 569 FX_FLOAT& right, |
590 FX_FLOAT& bottom) const { | 570 FX_FLOAT& bottom) const { |
591 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 571 if (!m_bIsParsed) |
592 return; | 572 return; |
593 | 573 |
594 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | 574 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
595 return; | 575 return; |
596 | 576 |
597 left = m_SelRects.GetAt(rectIndex).left; | 577 left = m_SelRects.GetAt(rectIndex).left; |
598 top = m_SelRects.GetAt(rectIndex).top; | 578 top = m_SelRects.GetAt(rectIndex).top; |
599 right = m_SelRects.GetAt(rectIndex).right; | 579 right = m_SelRects.GetAt(rectIndex).right; |
600 bottom = m_SelRects.GetAt(rectIndex).bottom; | 580 bottom = m_SelRects.GetAt(rectIndex).bottom; |
601 } | 581 } |
602 | 582 |
603 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | 583 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
604 if (m_ParseOptions.m_bGetCharCodeOnly) { | |
605 return FALSE; | |
606 } | |
607 if (end == start) { | 584 if (end == start) { |
608 return FALSE; | 585 return FALSE; |
609 } | 586 } |
610 FPDF_CHAR_INFO info_start; | 587 FPDF_CHAR_INFO info_start; |
611 FPDF_CHAR_INFO info_end; | 588 FPDF_CHAR_INFO info_end; |
612 GetCharInfo(start, &info_start); | 589 GetCharInfo(start, &info_start); |
613 GetCharInfo(end, &info_end); | 590 GetCharInfo(end, &info_end); |
614 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { | 591 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { |
615 if (--end <= start) | 592 if (--end <= start) |
616 return FALSE; | 593 return FALSE; |
(...skipping 17 matching lines...) Expand all Loading... |
634 if (Rotate < 0) { | 611 if (Rotate < 0) { |
635 Rotate = -Rotate; | 612 Rotate = -Rotate; |
636 } else if (Rotate > 0) { | 613 } else if (Rotate > 0) { |
637 Rotate = 360 - Rotate; | 614 Rotate = 360 - Rotate; |
638 } | 615 } |
639 return TRUE; | 616 return TRUE; |
640 } | 617 } |
641 | 618 |
642 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, | 619 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, |
643 int& Rotate) { | 620 int& Rotate) { |
644 if (m_ParseOptions.m_bGetCharCodeOnly) { | |
645 return FALSE; | |
646 } | |
647 int start, end, count, | 621 int start, end, count, |
648 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, | 622 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, |
649 TRUE); | 623 TRUE); |
650 if (n < 1) { | 624 if (n < 1) { |
651 return FALSE; | 625 return FALSE; |
652 } | 626 } |
653 if (n > 1) { | 627 if (n > 1) { |
654 GetBoundedSegment(n - 1, start, count); | 628 GetBoundedSegment(n - 1, start, count); |
655 end = start + count - 1; | 629 end = start + count - 1; |
656 GetBoundedSegment(0, start, count); | 630 GetBoundedSegment(0, start, count); |
657 } else { | 631 } else { |
658 GetBoundedSegment(0, start, count); | 632 GetBoundedSegment(0, start, count); |
659 end = start + count - 1; | 633 end = start + count - 1; |
660 } | 634 } |
661 return GetBaselineRotate(start, end, Rotate); | 635 return GetBaselineRotate(start, end, Rotate); |
662 } | 636 } |
663 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | 637 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
664 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 638 if (!m_bIsParsed) |
665 return FALSE; | 639 return FALSE; |
666 | 640 |
667 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) | 641 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
668 return FALSE; | 642 return FALSE; |
669 | 643 |
670 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | 644 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
671 return GetBaselineRotate(rect, Rotate); | 645 return GetBaselineRotate(rect, Rotate); |
672 } | 646 } |
673 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 647 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
674 FX_FLOAT top, | 648 FX_FLOAT top, |
675 FX_FLOAT right, | 649 FX_FLOAT right, |
676 FX_FLOAT bottom, | 650 FX_FLOAT bottom, |
677 FX_BOOL bContains) { | 651 FX_BOOL bContains) { |
678 if (m_ParseOptions.m_bGetCharCodeOnly) | |
679 return -1; | |
680 | |
681 m_Segment.RemoveAll(); | 652 m_Segment.RemoveAll(); |
682 if (!m_bIsParsed) | 653 if (!m_bIsParsed) |
683 return -1; | 654 return -1; |
684 | 655 |
685 CFX_FloatRect rect(left, bottom, right, top); | 656 CFX_FloatRect rect(left, bottom, right, top); |
686 rect.Normalize(); | 657 rect.Normalize(); |
687 int nCount = m_charList.GetSize(); | 658 int nCount = m_charList.GetSize(); |
688 int pos = 0; | 659 int pos = 0; |
689 FPDF_SEGMENT segment; | 660 FPDF_SEGMENT segment; |
690 segment.m_Start = 0; | 661 segment.m_Start = 0; |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
744 } | 715 } |
745 if (segmentStatus == 1) { | 716 if (segmentStatus == 1) { |
746 segmentStatus = 2; | 717 segmentStatus = 2; |
747 m_Segment.Add(segment); | 718 m_Segment.Add(segment); |
748 segment.m_Start = 0; | 719 segment.m_Start = 0; |
749 segment.m_nCount = 0; | 720 segment.m_nCount = 0; |
750 } | 721 } |
751 return m_Segment.GetSize(); | 722 return m_Segment.GetSize(); |
752 } | 723 } |
753 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { | 724 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { |
754 if (m_ParseOptions.m_bGetCharCodeOnly) { | |
755 return; | |
756 } | |
757 if (index < 0 || index >= m_Segment.GetSize()) { | 725 if (index < 0 || index >= m_Segment.GetSize()) { |
758 return; | 726 return; |
759 } | 727 } |
760 start = m_Segment.GetAt(index).m_Start; | 728 start = m_Segment.GetAt(index).m_Start; |
761 count = m_Segment.GetAt(index).m_nCount; | 729 count = m_Segment.GetAt(index).m_nCount; |
762 } | 730 } |
763 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 731 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
764 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 732 if (!m_bIsParsed) |
765 return -1; | 733 return -1; |
766 | 734 |
767 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) | 735 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
768 return -1; | 736 return -1; |
769 | 737 |
770 if (index < 0 || index >= m_charList.GetSize()) | 738 if (index < 0 || index >= m_charList.GetSize()) |
771 return -1; | 739 return -1; |
772 | 740 |
773 PAGECHAR_INFO charinfo; | 741 PAGECHAR_INFO charinfo; |
774 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 742 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1000 FX_WCHAR* pDst = NULL; | 968 FX_WCHAR* pDst = NULL; |
1001 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 969 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
1002 if (nCount >= 1) { | 970 if (nCount >= 1) { |
1003 pDst = FX_Alloc(FX_WCHAR, nCount); | 971 pDst = FX_Alloc(FX_WCHAR, nCount); |
1004 FX_Unicode_GetNormalization(wChar, pDst); | 972 FX_Unicode_GetNormalization(wChar, pDst); |
1005 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 973 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
1006 PAGECHAR_INFO Info2 = Info; | 974 PAGECHAR_INFO Info2 = Info; |
1007 Info2.m_Unicode = pDst[nIndex]; | 975 Info2.m_Unicode = pDst[nIndex]; |
1008 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 976 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
1009 m_TextBuf.AppendChar(Info2.m_Unicode); | 977 m_TextBuf.AppendChar(Info2.m_Unicode); |
1010 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 978 m_charList.Add(Info2); |
1011 m_charList.Add(Info2); | |
1012 } | |
1013 } | 979 } |
1014 FX_Free(pDst); | 980 FX_Free(pDst); |
1015 return; | 981 return; |
1016 } | 982 } |
1017 } | 983 } |
1018 m_TextBuf.AppendChar(wChar); | 984 m_TextBuf.AppendChar(wChar); |
1019 } else { | 985 } else { |
1020 Info.m_Index = -1; | 986 Info.m_Index = -1; |
1021 } | 987 } |
1022 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 988 m_charList.Add(Info); |
1023 m_charList.Add(Info); | |
1024 } | |
1025 } | 989 } |
1026 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { | 990 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { |
1027 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); | 991 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); |
1028 if (!IsControlChar(Info)) { | 992 if (!IsControlChar(Info)) { |
1029 Info.m_Index = m_TextBuf.GetLength(); | 993 Info.m_Index = m_TextBuf.GetLength(); |
1030 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); | 994 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); |
1031 FX_WCHAR* pDst = NULL; | 995 FX_WCHAR* pDst = NULL; |
1032 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 996 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
1033 if (nCount >= 1) { | 997 if (nCount >= 1) { |
1034 pDst = FX_Alloc(FX_WCHAR, nCount); | 998 pDst = FX_Alloc(FX_WCHAR, nCount); |
1035 FX_Unicode_GetNormalization(wChar, pDst); | 999 FX_Unicode_GetNormalization(wChar, pDst); |
1036 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 1000 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
1037 PAGECHAR_INFO Info2 = Info; | 1001 PAGECHAR_INFO Info2 = Info; |
1038 Info2.m_Unicode = pDst[nIndex]; | 1002 Info2.m_Unicode = pDst[nIndex]; |
1039 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 1003 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
1040 m_TextBuf.AppendChar(Info2.m_Unicode); | 1004 m_TextBuf.AppendChar(Info2.m_Unicode); |
1041 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 1005 m_charList.Add(Info2); |
1042 m_charList.Add(Info2); | |
1043 } | |
1044 } | 1006 } |
1045 FX_Free(pDst); | 1007 FX_Free(pDst); |
1046 return; | 1008 return; |
1047 } | 1009 } |
1048 Info.m_Unicode = wChar; | 1010 Info.m_Unicode = wChar; |
1049 m_TextBuf.AppendChar(Info.m_Unicode); | 1011 m_TextBuf.AppendChar(Info.m_Unicode); |
1050 } else { | 1012 } else { |
1051 Info.m_Index = -1; | 1013 Info.m_Index = -1; |
1052 } | 1014 } |
1053 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 1015 m_charList.Add(Info); |
1054 m_charList.Add(Info); | |
1055 } | |
1056 } | 1016 } |
1057 void CPDF_TextPage::CloseTempLine() { | 1017 void CPDF_TextPage::CloseTempLine() { |
1058 int count1 = m_TempCharList.GetSize(); | 1018 int count1 = m_TempCharList.GetSize(); |
1059 if (count1 <= 0) { | 1019 if (count1 <= 0) { |
1060 return; | 1020 return; |
1061 } | 1021 } |
1062 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); | 1022 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); |
1063 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1023 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1064 CFX_WordArray order; | 1024 CFX_WordArray order; |
1065 FX_BOOL bR2L = FALSE; | 1025 FX_BOOL bR2L = FALSE; |
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1244 m_DisplayMatrix.Transform(this_x, this_y); | 1204 m_DisplayMatrix.Transform(this_x, this_y); |
1245 if (FXSYS_fabs(this_y - prev_y) > threshold * 2) { | 1205 if (FXSYS_fabs(this_y - prev_y) > threshold * 2) { |
1246 for (int i = 0; i < count; i++) { | 1206 for (int i = 0; i < count; i++) { |
1247 ProcessTextObject(m_LineObj.GetAt(i)); | 1207 ProcessTextObject(m_LineObj.GetAt(i)); |
1248 } | 1208 } |
1249 m_LineObj.RemoveAll(); | 1209 m_LineObj.RemoveAll(); |
1250 m_LineObj.Add(Obj); | 1210 m_LineObj.Add(Obj); |
1251 return; | 1211 return; |
1252 } | 1212 } |
1253 int i = 0; | 1213 int i = 0; |
1254 if (m_ParseOptions.m_bNormalizeObjs) { | 1214 for (i = count - 1; i >= 0; i--) { |
1255 for (i = count - 1; i >= 0; i--) { | 1215 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); |
1256 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); | 1216 CFX_Matrix prev_matrix; |
1257 CFX_Matrix prev_matrix; | 1217 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); |
1258 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); | 1218 FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), |
1259 FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), | 1219 Prev_y = prev_Obj.m_pTextObj->GetPosY(); |
1260 Prev_y = prev_Obj.m_pTextObj->GetPosY(); | 1220 prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); |
1261 prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); | 1221 m_DisplayMatrix.Transform(Prev_x, Prev_y); |
1262 m_DisplayMatrix.Transform(Prev_x, Prev_y); | 1222 if (this_x >= Prev_x) { |
1263 if (this_x >= Prev_x) { | 1223 if (i == count - 1) { |
1264 if (i == count - 1) { | 1224 m_LineObj.Add(Obj); |
1265 m_LineObj.Add(Obj); | 1225 } else { |
1266 } else { | 1226 m_LineObj.InsertAt(i + 1, Obj); |
1267 m_LineObj.InsertAt(i + 1, Obj); | |
1268 } | |
1269 break; | |
1270 } | 1227 } |
| 1228 break; |
1271 } | 1229 } |
1272 if (i < 0) { | 1230 } |
1273 m_LineObj.InsertAt(0, Obj); | 1231 if (i < 0) { |
1274 } | 1232 m_LineObj.InsertAt(0, Obj); |
1275 } else { | |
1276 m_LineObj.Add(Obj); | |
1277 } | 1233 } |
1278 } | 1234 } |
| 1235 |
1279 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { | 1236 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
1280 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1237 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
1281 CPDF_ContentMarkData* pMarkData = | 1238 CPDF_ContentMarkData* pMarkData = |
1282 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); | 1239 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
1283 if (!pMarkData) { | 1240 if (!pMarkData) { |
1284 return FPDFTEXT_MC_PASS; | 1241 return FPDFTEXT_MC_PASS; |
1285 } | 1242 } |
1286 int nContentMark = pMarkData->CountItems(); | 1243 int nContentMark = pMarkData->CountItems(); |
1287 if (nContentMark < 1) { | 1244 if (nContentMark < 1) { |
1288 return FPDFTEXT_MC_PASS; | 1245 return FPDFTEXT_MC_PASS; |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1517 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { | 1474 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { |
1518 if (!formMatrix.IsIdentity()) { | 1475 if (!formMatrix.IsIdentity()) { |
1519 generateChar.m_Matrix.Copy(formMatrix); | 1476 generateChar.m_Matrix.Copy(formMatrix); |
1520 } | 1477 } |
1521 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1478 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
1522 m_TempCharList.Add(generateChar); | 1479 m_TempCharList.Add(generateChar); |
1523 } | 1480 } |
1524 } else if (result == 2) { | 1481 } else if (result == 2) { |
1525 CloseTempLine(); | 1482 CloseTempLine(); |
1526 if (m_TextBuf.GetSize()) { | 1483 if (m_TextBuf.GetSize()) { |
1527 if (m_ParseOptions.m_bGetCharCodeOnly) { | 1484 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { |
1528 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | 1485 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); |
| 1486 if (!formMatrix.IsIdentity()) { |
| 1487 generateChar.m_Matrix.Copy(formMatrix); |
| 1488 } |
| 1489 m_charList.Add(generateChar); |
| 1490 } |
| 1491 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { |
1529 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | 1492 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); |
1530 } else { | 1493 if (!formMatrix.IsIdentity()) { |
1531 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { | 1494 generateChar.m_Matrix.Copy(formMatrix); |
1532 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | |
1533 if (!formMatrix.IsIdentity()) { | |
1534 generateChar.m_Matrix.Copy(formMatrix); | |
1535 } | |
1536 m_charList.Add(generateChar); | |
1537 } | 1495 } |
1538 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { | 1496 m_charList.Add(generateChar); |
1539 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | |
1540 if (!formMatrix.IsIdentity()) { | |
1541 generateChar.m_Matrix.Copy(formMatrix); | |
1542 } | |
1543 m_charList.Add(generateChar); | |
1544 } | |
1545 } | 1497 } |
1546 } | 1498 } |
1547 } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { | 1499 } else if (result == 3) { |
1548 int32_t nChars = pTextObj->CountChars(); | 1500 int32_t nChars = pTextObj->CountChars(); |
1549 if (nChars == 1) { | 1501 if (nChars == 1) { |
1550 CPDF_TextObjectItem item; | 1502 CPDF_TextObjectItem item; |
1551 pTextObj->GetCharInfo(0, &item); | 1503 pTextObj->GetCharInfo(0, &item); |
1552 CFX_WideString wstrItem = | 1504 CFX_WideString wstrItem = |
1553 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); | 1505 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
1554 if (wstrItem.IsEmpty()) { | 1506 if (wstrItem.IsEmpty()) { |
1555 wstrItem += (FX_WCHAR)item.m_CharCode; | 1507 wstrItem += (FX_WCHAR)item.m_CharCode; |
1556 } | 1508 } |
1557 FX_WCHAR curChar = wstrItem.GetAt(0); | 1509 FX_WCHAR curChar = wstrItem.GetAt(0); |
(...skipping 420 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1978 } | 1930 } |
1979 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, | 1931 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, |
1980 CPDF_TextObject* pTextObj2) { | 1932 CPDF_TextObject* pTextObj2) { |
1981 if (!pTextObj1 || !pTextObj2) { | 1933 if (!pTextObj1 || !pTextObj2) { |
1982 return FALSE; | 1934 return FALSE; |
1983 } | 1935 } |
1984 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, | 1936 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, |
1985 pTextObj2->m_Right, pTextObj2->m_Top); | 1937 pTextObj2->m_Right, pTextObj2->m_Top); |
1986 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, | 1938 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, |
1987 pTextObj1->m_Right, pTextObj1->m_Top); | 1939 pTextObj1->m_Right, pTextObj1->m_Top); |
1988 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && | 1940 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { |
1989 !m_ParseOptions.m_bGetCharCodeOnly) { | |
1990 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); | 1941 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); |
1991 int nCount = m_charList.GetSize(); | 1942 int nCount = m_charList.GetSize(); |
1992 if (nCount >= 2) { | 1943 if (nCount >= 2) { |
1993 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2]; | 1944 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2]; |
1994 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); | 1945 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); |
1995 if (dbXdif > dbSpace) { | 1946 if (dbXdif > dbSpace) { |
1996 return FALSE; | 1947 return FALSE; |
1997 } | 1948 } |
1998 } | 1949 } |
1999 } | 1950 } |
(...skipping 718 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2718 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2669 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2719 return; | 2670 return; |
2720 } | 2671 } |
2721 CPDF_LinkExt* link = NULL; | 2672 CPDF_LinkExt* link = NULL; |
2722 link = m_LinkList.GetAt(index); | 2673 link = m_LinkList.GetAt(index); |
2723 if (!link) { | 2674 if (!link) { |
2724 return; | 2675 return; |
2725 } | 2676 } |
2726 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2677 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2727 } | 2678 } |
OLD | NEW |