OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <ctype.h> | 7 #include <ctype.h> |
8 #include <algorithm> | 8 #include <algorithm> |
9 | 9 |
10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
90 int flags) { | 90 int flags) { |
91 return new CPDF_TextPage(pPage, flags); | 91 return new CPDF_TextPage(pPage, flags); |
92 } | 92 } |
93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, | 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, |
94 int flags) { | 94 int flags) { |
95 return new CPDF_TextPage(pObjs, flags); | 95 return new CPDF_TextPage(pObjs, flags); |
96 } | 96 } |
97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | 97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
98 const IPDF_TextPage* pTextPage) { | 98 const IPDF_TextPage* pTextPage) { |
99 if (!pTextPage) { | 99 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
100 return NULL; | |
101 } | |
102 return new CPDF_TextPageFind(pTextPage); | |
103 } | 100 } |
104 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | 101 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
105 return new CPDF_LinkExtract(); | 102 return new CPDF_LinkExtract(); |
106 } | 103 } |
107 #define TEXT_BLANK_CHAR L' ' | 104 #define TEXT_BLANK_CHAR L' ' |
108 #define TEXT_LINEFEED_CHAR L'\n' | 105 #define TEXT_LINEFEED_CHAR L'\n' |
109 #define TEXT_RETURN_CHAR L'\r' | 106 #define TEXT_RETURN_CHAR L'\r' |
110 #define TEXT_EMPTY L"" | 107 #define TEXT_EMPTY L"" |
111 #define TEXT_BLANK L" " | 108 #define TEXT_BLANK L" " |
112 #define TEXT_RETURN_LINEFEED L"\r\n" | 109 #define TEXT_RETURN_LINEFEED L"\r\n" |
113 #define TEXT_LINEFEED L"\n" | 110 #define TEXT_LINEFEED L"\n" |
114 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 111 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
| 112 |
115 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 113 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
116 : m_charList(512), | 114 : m_pPage(pPage), |
| 115 m_charList(512), |
117 m_TempCharList(50), | 116 m_TempCharList(50), |
118 m_pPreTextObj(NULL), | 117 m_parserflag(flags), |
119 m_IsParsered(FALSE), | 118 m_pPreTextObj(nullptr), |
| 119 m_bIsParsed(false), |
120 m_TextlineDir(-1), | 120 m_TextlineDir(-1), |
121 m_CurlineRect(0, 0, 0, 0) { | 121 m_CurlineRect(0, 0, 0, 0) { |
122 m_pPage = pPage; | |
123 m_parserflag = flags; | |
124 m_TextBuf.EstimateSize(0, 10240); | 122 m_TextBuf.EstimateSize(0, 10240); |
125 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 123 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
126 (int)pPage->GetPageHeight(), 0); | 124 (int)pPage->GetPageHeight(), 0); |
127 } | 125 } |
| 126 |
128 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, | 127 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, |
129 CPDFText_ParseOptions ParserOptions) | 128 CPDFText_ParseOptions ParserOptions) |
130 : m_ParseOptions(ParserOptions), | 129 : m_ParseOptions(ParserOptions), |
| 130 m_pPage(pPage), |
131 m_charList(512), | 131 m_charList(512), |
132 m_TempCharList(50), | 132 m_TempCharList(50), |
133 m_pPreTextObj(NULL), | 133 m_parserflag(0), |
134 m_IsParsered(FALSE), | 134 m_pPreTextObj(nullptr), |
| 135 m_bIsParsed(false), |
135 m_TextlineDir(-1), | 136 m_TextlineDir(-1), |
136 m_CurlineRect(0, 0, 0, 0) { | 137 m_CurlineRect(0, 0, 0, 0) { |
137 m_pPage = pPage; | |
138 m_parserflag = 0; | |
139 m_TextBuf.EstimateSize(0, 10240); | 138 m_TextBuf.EstimateSize(0, 10240); |
140 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 139 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
141 (int)pPage->GetPageHeight(), 0); | 140 (int)pPage->GetPageHeight(), 0); |
142 } | 141 } |
| 142 |
143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) | 143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) |
144 : m_charList(512), | 144 : m_pPage(pPage), |
| 145 m_charList(512), |
145 m_TempCharList(50), | 146 m_TempCharList(50), |
146 m_pPreTextObj(NULL), | 147 m_parserflag(flags), |
147 m_IsParsered(FALSE), | 148 m_pPreTextObj(nullptr), |
| 149 m_bIsParsed(false), |
148 m_TextlineDir(-1), | 150 m_TextlineDir(-1), |
149 m_CurlineRect(0, 0, 0, 0) { | 151 m_CurlineRect(0, 0, 0, 0) { |
150 m_pPage = pPage; | |
151 m_parserflag = flags; | |
152 m_TextBuf.EstimateSize(0, 10240); | 152 m_TextBuf.EstimateSize(0, 10240); |
153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | 153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); |
154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); | 154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); |
155 } | 155 } |
156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { | 156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { |
157 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 157 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
158 } | 158 } |
159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
160 switch (charInfo.m_Unicode) { | 160 switch (charInfo.m_Unicode) { |
161 case 0x2: | 161 case 0x2: |
162 case 0x3: | 162 case 0x3: |
163 case 0x93: | 163 case 0x93: |
164 case 0x94: | 164 case 0x94: |
165 case 0x96: | 165 case 0x96: |
166 case 0x97: | 166 case 0x97: |
167 case 0x98: | 167 case 0x98: |
168 case 0xfffe: | 168 case 0xfffe: |
169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
170 default: | 170 default: |
171 return false; | 171 return false; |
172 } | 172 } |
173 } | 173 } |
174 FX_BOOL CPDF_TextPage::ParseTextPage() { | 174 FX_BOOL CPDF_TextPage::ParseTextPage() { |
175 if (!m_pPage) { | 175 m_bIsParsed = false; |
176 m_IsParsered = FALSE; | 176 if (!m_pPage) |
177 return FALSE; | 177 return FALSE; |
178 } | 178 |
179 m_IsParsered = FALSE; | |
180 m_TextBuf.Clear(); | 179 m_TextBuf.Clear(); |
181 m_charList.RemoveAll(); | 180 m_charList.RemoveAll(); |
182 m_pPreTextObj = NULL; | 181 m_pPreTextObj = NULL; |
183 ProcessObject(); | 182 ProcessObject(); |
184 m_IsParsered = TRUE; | 183 m_bIsParsed = true; |
185 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 184 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
186 m_CharIndex.RemoveAll(); | 185 m_CharIndex.RemoveAll(); |
187 int nCount = m_charList.GetSize(); | 186 int nCount = m_charList.GetSize(); |
188 if (nCount) { | 187 if (nCount) { |
189 m_CharIndex.Add(0); | 188 m_CharIndex.Add(0); |
190 } | 189 } |
191 for (int i = 0; i < nCount; i++) { | 190 for (int i = 0; i < nCount; i++) { |
192 int indexSize = m_CharIndex.GetSize(); | 191 int indexSize = m_CharIndex.GetSize(); |
193 FX_BOOL bNormal = FALSE; | 192 FX_BOOL bNormal = FALSE; |
194 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); | 193 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
262 } | 261 } |
263 void CPDF_TextPage::GetRectArray(int start, | 262 void CPDF_TextPage::GetRectArray(int start, |
264 int nCount, | 263 int nCount, |
265 CFX_RectArray& rectArray) const { | 264 CFX_RectArray& rectArray) const { |
266 if (m_ParseOptions.m_bGetCharCodeOnly) { | 265 if (m_ParseOptions.m_bGetCharCodeOnly) { |
267 return; | 266 return; |
268 } | 267 } |
269 if (start < 0 || nCount == 0) { | 268 if (start < 0 || nCount == 0) { |
270 return; | 269 return; |
271 } | 270 } |
272 if (!m_IsParsered) { | 271 if (!m_bIsParsed) { |
273 return; | 272 return; |
274 } | 273 } |
275 PAGECHAR_INFO info_curchar; | 274 PAGECHAR_INFO info_curchar; |
276 CPDF_TextObject* pCurObj = NULL; | 275 CPDF_TextObject* pCurObj = NULL; |
277 CFX_FloatRect rect; | 276 CFX_FloatRect rect; |
278 int curPos = start; | 277 int curPos = start; |
279 FX_BOOL flagNewRect = TRUE; | 278 FX_BOOL flagNewRect = TRUE; |
280 if (nCount + start > m_charList.GetSize() || nCount == -1) { | 279 if (nCount + start > m_charList.GetSize() || nCount == -1) { |
281 nCount = m_charList.GetSize() - start; | 280 nCount = m_charList.GetSize() - start; |
282 } | 281 } |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
345 rect.bottom = info_curchar.m_CharBox.bottom; | 344 rect.bottom = info_curchar.m_CharBox.bottom; |
346 } | 345 } |
347 } | 346 } |
348 } | 347 } |
349 rectArray.Add(rect); | 348 rectArray.Add(rect); |
350 return; | 349 return; |
351 } | 350 } |
352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, | 351 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
353 FX_FLOAT xTolerance, | 352 FX_FLOAT xTolerance, |
354 FX_FLOAT yTolerance) const { | 353 FX_FLOAT yTolerance) const { |
355 if (m_ParseOptions.m_bGetCharCodeOnly) { | 354 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
356 return -3; | 355 return -3; |
357 } | 356 |
358 if (!m_IsParsered) { | |
359 return -3; | |
360 } | |
361 int pos = 0; | 357 int pos = 0; |
362 int NearPos = -1; | 358 int NearPos = -1; |
363 double xdif = 5000, ydif = 5000; | 359 double xdif = 5000, ydif = 5000; |
364 while (pos < m_charList.GetSize()) { | 360 while (pos < m_charList.GetSize()) { |
365 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); | 361 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); |
366 CFX_FloatRect charrect = charinfo.m_CharBox; | 362 CFX_FloatRect charrect = charinfo.m_CharBox; |
367 if (charrect.Contains(point.x, point.y)) { | 363 if (charrect.Contains(point.x, point.y)) { |
368 break; | 364 break; |
369 } | 365 } |
370 if (xTolerance > 0 || yTolerance > 0) { | 366 if (xTolerance > 0 || yTolerance > 0) { |
(...skipping 22 matching lines...) Expand all Loading... |
393 } | 389 } |
394 ++pos; | 390 ++pos; |
395 } | 391 } |
396 if (pos >= m_charList.GetSize()) { | 392 if (pos >= m_charList.GetSize()) { |
397 pos = NearPos; | 393 pos = NearPos; |
398 } | 394 } |
399 return pos; | 395 return pos; |
400 } | 396 } |
401 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { | 397 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
402 CFX_WideString strText; | 398 CFX_WideString strText; |
403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { | 399 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
404 return strText; | 400 return strText; |
405 } | 401 |
406 int nCount = m_charList.GetSize(); | 402 int nCount = m_charList.GetSize(); |
407 int pos = 0; | 403 int pos = 0; |
408 FX_FLOAT posy = 0; | 404 FX_FLOAT posy = 0; |
409 FX_BOOL IsContainPreChar = FALSE; | 405 FX_BOOL IsContainPreChar = FALSE; |
410 FX_BOOL ISAddLineFeed = FALSE; | 406 FX_BOOL ISAddLineFeed = FALSE; |
411 while (pos < nCount) { | 407 while (pos < nCount) { |
412 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 408 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
413 if (IsRectIntersect(rect, charinfo.m_CharBox)) { | 409 if (IsRectIntersect(rect, charinfo.m_CharBox)) { |
414 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && | 410 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && |
415 ISAddLineFeed) { | 411 ISAddLineFeed) { |
(...skipping 15 matching lines...) Expand all Loading... |
431 } | 427 } |
432 } else { | 428 } else { |
433 IsContainPreChar = FALSE; | 429 IsContainPreChar = FALSE; |
434 ISAddLineFeed = TRUE; | 430 ISAddLineFeed = TRUE; |
435 } | 431 } |
436 } | 432 } |
437 return strText; | 433 return strText; |
438 } | 434 } |
439 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, | 435 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
440 CFX_RectArray& resRectArray) const { | 436 CFX_RectArray& resRectArray) const { |
441 if (m_ParseOptions.m_bGetCharCodeOnly) { | 437 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
442 return; | 438 return; |
443 } | 439 |
444 if (!m_IsParsered) { | |
445 return; | |
446 } | |
447 CFX_FloatRect curRect; | 440 CFX_FloatRect curRect; |
448 FX_BOOL flagNewRect = TRUE; | 441 FX_BOOL flagNewRect = TRUE; |
449 CPDF_TextObject* pCurObj = NULL; | 442 CPDF_TextObject* pCurObj = NULL; |
450 int nCount = m_charList.GetSize(); | 443 int nCount = m_charList.GetSize(); |
451 int pos = 0; | 444 int pos = 0; |
452 while (pos < nCount) { | 445 while (pos < nCount) { |
453 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 446 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
454 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 447 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
455 continue; | 448 continue; |
456 } | 449 } |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
491 FX_FLOAT y, | 484 FX_FLOAT y, |
492 FX_FLOAT xTolerance, | 485 FX_FLOAT xTolerance, |
493 FX_FLOAT yTolerance) const { | 486 FX_FLOAT yTolerance) const { |
494 if (m_ParseOptions.m_bGetCharCodeOnly) { | 487 if (m_ParseOptions.m_bGetCharCodeOnly) { |
495 return -3; | 488 return -3; |
496 } | 489 } |
497 CPDF_Point point(x, y); | 490 CPDF_Point point(x, y); |
498 return GetIndexAtPos(point, xTolerance, yTolerance); | 491 return GetIndexAtPos(point, xTolerance, yTolerance); |
499 } | 492 } |
500 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { | 493 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { |
501 if (m_ParseOptions.m_bGetCharCodeOnly) { | 494 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
502 return; | 495 return; |
503 } | 496 |
504 if (!m_IsParsered) { | 497 if (index < 0 || index >= m_charList.GetSize()) |
505 return; | 498 return; |
506 } | 499 |
507 if (index < 0 || index >= m_charList.GetSize()) { | |
508 return; | |
509 } | |
510 PAGECHAR_INFO charinfo; | 500 PAGECHAR_INFO charinfo; |
511 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 501 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
512 info.m_Charcode = charinfo.m_CharCode; | 502 info.m_Charcode = charinfo.m_CharCode; |
513 info.m_OriginX = charinfo.m_OriginX; | 503 info.m_OriginX = charinfo.m_OriginX; |
514 info.m_OriginY = charinfo.m_OriginY; | 504 info.m_OriginY = charinfo.m_OriginY; |
515 info.m_Unicode = charinfo.m_Unicode; | 505 info.m_Unicode = charinfo.m_Unicode; |
516 info.m_Flag = charinfo.m_Flag; | 506 info.m_Flag = charinfo.m_Flag; |
517 info.m_CharBox = charinfo.m_CharBox; | 507 info.m_CharBox = charinfo.m_CharBox; |
518 info.m_pTextObj = charinfo.m_pTextObj; | 508 info.m_pTextObj = charinfo.m_pTextObj; |
519 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { | 509 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
554 if (endIndex >= m_charList.GetSize()) { | 544 if (endIndex >= m_charList.GetSize()) { |
555 break; | 545 break; |
556 } | 546 } |
557 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); | 547 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); |
558 } | 548 } |
559 endIndex--; | 549 endIndex--; |
560 nCount = endIndex - start + 1; | 550 nCount = endIndex - start + 1; |
561 } | 551 } |
562 } | 552 } |
563 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { | 553 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
564 if (!m_IsParsered || nCount == 0) { | 554 if (!m_bIsParsed || nCount == 0) |
565 return L""; | 555 return L""; |
566 } | 556 |
567 if (start < 0) { | 557 if (start < 0) |
568 start = 0; | 558 start = 0; |
569 } | 559 |
570 if (nCount == -1) { | 560 if (nCount == -1) { |
571 nCount = m_charList.GetSize() - start; | 561 nCount = m_charList.GetSize() - start; |
572 return m_TextBuf.GetWideString().Mid(start, | 562 return m_TextBuf.GetWideString().Mid(start, |
573 m_TextBuf.GetWideString().GetLength()); | 563 m_TextBuf.GetWideString().GetLength()); |
574 } | 564 } |
575 if (nCount <= 0 || m_charList.GetSize() <= 0) { | 565 if (nCount <= 0 || m_charList.GetSize() <= 0) { |
576 return L""; | 566 return L""; |
577 } | 567 } |
578 if (nCount + start > m_charList.GetSize() - 1) { | 568 if (nCount + start > m_charList.GetSize() - 1) { |
579 nCount = m_charList.GetSize() - start; | 569 nCount = m_charList.GetSize() - start; |
(...skipping 23 matching lines...) Expand all Loading... |
603 charinfo = | 593 charinfo = |
604 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); | 594 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); |
605 } | 595 } |
606 nCount = start + nCount - nCountOffset - startindex; | 596 nCount = start + nCount - nCountOffset - startindex; |
607 if (nCount <= 0) { | 597 if (nCount <= 0) { |
608 return L""; | 598 return L""; |
609 } | 599 } |
610 return m_TextBuf.GetWideString().Mid(startindex, nCount); | 600 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
611 } | 601 } |
612 int CPDF_TextPage::CountRects(int start, int nCount) { | 602 int CPDF_TextPage::CountRects(int start, int nCount) { |
613 if (m_ParseOptions.m_bGetCharCodeOnly) { | 603 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) |
614 return -1; | 604 return -1; |
615 } | 605 |
616 if (!m_IsParsered) { | |
617 return -1; | |
618 } | |
619 if (start < 0) { | |
620 return -1; | |
621 } | |
622 if (nCount == -1 || nCount + start > m_charList.GetSize()) { | 606 if (nCount == -1 || nCount + start > m_charList.GetSize()) { |
623 nCount = m_charList.GetSize() - start; | 607 nCount = m_charList.GetSize() - start; |
624 } | 608 } |
625 m_SelRects.RemoveAll(); | 609 m_SelRects.RemoveAll(); |
626 GetRectArray(start, nCount, m_SelRects); | 610 GetRectArray(start, nCount, m_SelRects); |
627 return m_SelRects.GetSize(); | 611 return m_SelRects.GetSize(); |
628 } | 612 } |
629 void CPDF_TextPage::GetRect(int rectIndex, | 613 void CPDF_TextPage::GetRect(int rectIndex, |
630 FX_FLOAT& left, | 614 FX_FLOAT& left, |
631 FX_FLOAT& top, | 615 FX_FLOAT& top, |
632 FX_FLOAT& right, | 616 FX_FLOAT& right, |
633 FX_FLOAT& bottom) const { | 617 FX_FLOAT& bottom) const { |
634 if (m_ParseOptions.m_bGetCharCodeOnly) { | 618 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
635 return; | 619 return; |
636 } | 620 |
637 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { | 621 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
638 return; | 622 return; |
639 } | 623 |
640 left = m_SelRects.GetAt(rectIndex).left; | 624 left = m_SelRects.GetAt(rectIndex).left; |
641 top = m_SelRects.GetAt(rectIndex).top; | 625 top = m_SelRects.GetAt(rectIndex).top; |
642 right = m_SelRects.GetAt(rectIndex).right; | 626 right = m_SelRects.GetAt(rectIndex).right; |
643 bottom = m_SelRects.GetAt(rectIndex).bottom; | 627 bottom = m_SelRects.GetAt(rectIndex).bottom; |
644 } | 628 } |
645 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | 629 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
646 if (m_ParseOptions.m_bGetCharCodeOnly) { | 630 if (m_ParseOptions.m_bGetCharCodeOnly) { |
647 return FALSE; | 631 return FALSE; |
648 } | 632 } |
649 if (end == start) { | 633 if (end == start) { |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
696 GetBoundedSegment(n - 1, start, count); | 680 GetBoundedSegment(n - 1, start, count); |
697 end = start + count - 1; | 681 end = start + count - 1; |
698 GetBoundedSegment(0, start, count); | 682 GetBoundedSegment(0, start, count); |
699 } else { | 683 } else { |
700 GetBoundedSegment(0, start, count); | 684 GetBoundedSegment(0, start, count); |
701 end = start + count - 1; | 685 end = start + count - 1; |
702 } | 686 } |
703 return GetBaselineRotate(start, end, Rotate); | 687 return GetBaselineRotate(start, end, Rotate); |
704 } | 688 } |
705 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | 689 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
706 if (m_ParseOptions.m_bGetCharCodeOnly) { | 690 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
707 return FALSE; | 691 return FALSE; |
708 } | 692 |
709 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { | 693 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) |
710 return FALSE; | 694 return FALSE; |
711 } | 695 |
712 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | 696 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
713 return GetBaselineRotate(rect, Rotate); | 697 return GetBaselineRotate(rect, Rotate); |
714 } | 698 } |
715 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 699 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
716 FX_FLOAT top, | 700 FX_FLOAT top, |
717 FX_FLOAT right, | 701 FX_FLOAT right, |
718 FX_FLOAT bottom, | 702 FX_FLOAT bottom, |
719 FX_BOOL bContains) { | 703 FX_BOOL bContains) { |
720 if (m_ParseOptions.m_bGetCharCodeOnly) { | 704 if (m_ParseOptions.m_bGetCharCodeOnly) |
721 return -1; | 705 return -1; |
722 } | 706 |
723 m_Segment.RemoveAll(); | 707 m_Segment.RemoveAll(); |
724 if (!m_IsParsered) { | 708 if (!m_bIsParsed) |
725 return -1; | 709 return -1; |
726 } | 710 |
727 CFX_FloatRect rect(left, bottom, right, top); | 711 CFX_FloatRect rect(left, bottom, right, top); |
728 rect.Normalize(); | 712 rect.Normalize(); |
729 int nCount = m_charList.GetSize(); | 713 int nCount = m_charList.GetSize(); |
730 int pos = 0; | 714 int pos = 0; |
731 FPDF_SEGMENT segment; | 715 FPDF_SEGMENT segment; |
732 segment.m_Start = 0; | 716 segment.m_Start = 0; |
733 segment.m_nCount = 0; | 717 segment.m_nCount = 0; |
734 int segmentStatus = 0; | 718 int segmentStatus = 0; |
735 FX_BOOL IsContainPreChar = FALSE; | 719 FX_BOOL IsContainPreChar = FALSE; |
736 while (pos < nCount) { | 720 while (pos < nCount) { |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
796 if (m_ParseOptions.m_bGetCharCodeOnly) { | 780 if (m_ParseOptions.m_bGetCharCodeOnly) { |
797 return; | 781 return; |
798 } | 782 } |
799 if (index < 0 || index >= m_Segment.GetSize()) { | 783 if (index < 0 || index >= m_Segment.GetSize()) { |
800 return; | 784 return; |
801 } | 785 } |
802 start = m_Segment.GetAt(index).m_Start; | 786 start = m_Segment.GetAt(index).m_Start; |
803 count = m_Segment.GetAt(index).m_nCount; | 787 count = m_Segment.GetAt(index).m_nCount; |
804 } | 788 } |
805 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 789 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
806 if (m_ParseOptions.m_bGetCharCodeOnly) { | 790 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
807 return -1; | 791 return -1; |
808 } | 792 |
809 if (!m_IsParsered) { | 793 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
810 return -1; | 794 return -1; |
811 } | 795 |
812 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { | 796 if (index < 0 || index >= m_charList.GetSize()) |
813 return -1; | 797 return -1; |
814 } | 798 |
815 if (index < 0 || index >= m_charList.GetSize()) { | |
816 return -1; | |
817 } | |
818 PAGECHAR_INFO charinfo; | 799 PAGECHAR_INFO charinfo; |
819 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 800 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
820 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 801 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
821 return index; | 802 return index; |
822 } | 803 } |
823 if (!IsLetter(charinfo.m_Unicode)) { | 804 if (!IsLetter(charinfo.m_Unicode)) { |
824 return index; | 805 return index; |
825 } | 806 } |
826 int breakPos = index; | 807 int breakPos = index; |
827 if (direction == FPDFTEXT_LEFT) { | 808 if (direction == FPDFTEXT_LEFT) { |
(...skipping 1721 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2549 rects.Copy(m_resArray); | 2530 rects.Copy(m_resArray); |
2550 } | 2531 } |
2551 int CPDF_TextPageFind::GetCurOrder() const { | 2532 int CPDF_TextPageFind::GetCurOrder() const { |
2552 return GetCharIndex(m_resStart); | 2533 return GetCharIndex(m_resStart); |
2553 } | 2534 } |
2554 int CPDF_TextPageFind::GetMatchedCount() const { | 2535 int CPDF_TextPageFind::GetMatchedCount() const { |
2555 int resStart = GetCharIndex(m_resStart); | 2536 int resStart = GetCharIndex(m_resStart); |
2556 int resEnd = GetCharIndex(m_resEnd); | 2537 int resEnd = GetCharIndex(m_resEnd); |
2557 return resEnd - resStart + 1; | 2538 return resEnd - resStart + 1; |
2558 } | 2539 } |
2559 CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} | 2540 |
| 2541 CPDF_LinkExtract::CPDF_LinkExtract() |
| 2542 : m_pTextPage(nullptr), m_bIsParsed(false) { |
| 2543 } |
| 2544 |
2560 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2545 CPDF_LinkExtract::~CPDF_LinkExtract() { |
2561 DeleteLinkList(); | 2546 DeleteLinkList(); |
2562 } | 2547 } |
| 2548 |
2563 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { | 2549 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { |
2564 if (!pTextPage || !pTextPage->IsParsed()) { | 2550 if (!pTextPage || !pTextPage->IsParsed()) |
2565 return FALSE; | 2551 return FALSE; |
2566 } | 2552 |
2567 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2553 m_pTextPage = (const CPDF_TextPage*)pTextPage; |
2568 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2554 m_strPageText = m_pTextPage->GetPageText(0, -1); |
2569 DeleteLinkList(); | 2555 DeleteLinkList(); |
2570 if (m_strPageText.IsEmpty()) { | 2556 if (m_strPageText.IsEmpty()) { |
2571 return FALSE; | 2557 return FALSE; |
2572 } | 2558 } |
2573 parserLink(); | 2559 ParseLink(); |
2574 m_IsParserd = TRUE; | 2560 m_bIsParsed = true; |
2575 return TRUE; | 2561 return TRUE; |
2576 } | 2562 } |
| 2563 |
2577 void CPDF_LinkExtract::DeleteLinkList() { | 2564 void CPDF_LinkExtract::DeleteLinkList() { |
2578 while (m_LinkList.GetSize()) { | 2565 while (m_LinkList.GetSize()) { |
2579 CPDF_LinkExt* linkinfo = NULL; | 2566 CPDF_LinkExt* linkinfo = NULL; |
2580 linkinfo = m_LinkList.GetAt(0); | 2567 linkinfo = m_LinkList.GetAt(0); |
2581 m_LinkList.RemoveAt(0); | 2568 m_LinkList.RemoveAt(0); |
2582 delete linkinfo; | 2569 delete linkinfo; |
2583 } | 2570 } |
2584 m_LinkList.RemoveAll(); | 2571 m_LinkList.RemoveAll(); |
2585 } | 2572 } |
2586 int CPDF_LinkExtract::CountLinks() const { | 2573 int CPDF_LinkExtract::CountLinks() const { |
2587 if (!m_IsParserd) { | 2574 if (!m_bIsParsed) { |
2588 return -1; | 2575 return -1; |
2589 } | 2576 } |
2590 return m_LinkList.GetSize(); | 2577 return m_LinkList.GetSize(); |
2591 } | 2578 } |
2592 void CPDF_LinkExtract::parserLink() { | 2579 void CPDF_LinkExtract::ParseLink() { |
2593 int start = 0, pos = 0; | 2580 int start = 0, pos = 0; |
2594 int TotalChar = m_pTextPage->CountChars(); | 2581 int TotalChar = m_pTextPage->CountChars(); |
2595 while (pos < TotalChar) { | 2582 while (pos < TotalChar) { |
2596 FPDF_CHAR_INFO pageChar; | 2583 FPDF_CHAR_INFO pageChar; |
2597 m_pTextPage->GetCharInfo(pos, pageChar); | 2584 m_pTextPage->GetCharInfo(pos, pageChar); |
2598 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2585 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
2599 pos == TotalChar - 1) { | 2586 pos == TotalChar - 1) { |
2600 int nCount = pos - start; | 2587 int nCount = pos - start; |
2601 if (pos == TotalChar - 1) { | 2588 if (pos == TotalChar - 1) { |
2602 nCount++; | 2589 nCount++; |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2734 int count, | 2721 int count, |
2735 const CFX_WideString& strUrl) { | 2722 const CFX_WideString& strUrl) { |
2736 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2723 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; |
2737 linkInfo->m_strUrl = strUrl; | 2724 linkInfo->m_strUrl = strUrl; |
2738 linkInfo->m_Start = start; | 2725 linkInfo->m_Start = start; |
2739 linkInfo->m_Count = count; | 2726 linkInfo->m_Count = count; |
2740 m_LinkList.Add(linkInfo); | 2727 m_LinkList.Add(linkInfo); |
2741 } | 2728 } |
2742 | 2729 |
2743 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | 2730 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { |
2744 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2731 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2745 return L""; | 2732 return L""; |
2746 } | 2733 } |
2747 CPDF_LinkExt* link = NULL; | 2734 CPDF_LinkExt* link = NULL; |
2748 link = m_LinkList.GetAt(index); | 2735 link = m_LinkList.GetAt(index); |
2749 if (!link) { | 2736 if (!link) { |
2750 return L""; | 2737 return L""; |
2751 } | 2738 } |
2752 return link->m_strUrl; | 2739 return link->m_strUrl; |
2753 } | 2740 } |
2754 void CPDF_LinkExtract::GetBoundedSegment(int index, | 2741 void CPDF_LinkExtract::GetBoundedSegment(int index, |
2755 int& start, | 2742 int& start, |
2756 int& count) const { | 2743 int& count) const { |
2757 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2744 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2758 return; | 2745 return; |
2759 } | 2746 } |
2760 CPDF_LinkExt* link = NULL; | 2747 CPDF_LinkExt* link = NULL; |
2761 link = m_LinkList.GetAt(index); | 2748 link = m_LinkList.GetAt(index); |
2762 if (!link) { | 2749 if (!link) { |
2763 return; | 2750 return; |
2764 } | 2751 } |
2765 start = link->m_Start; | 2752 start = link->m_Start; |
2766 count = link->m_Count; | 2753 count = link->m_Count; |
2767 } | 2754 } |
2768 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | 2755 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
2769 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2756 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2770 return; | 2757 return; |
2771 } | 2758 } |
2772 CPDF_LinkExt* link = NULL; | 2759 CPDF_LinkExt* link = NULL; |
2773 link = m_LinkList.GetAt(index); | 2760 link = m_LinkList.GetAt(index); |
2774 if (!link) { | 2761 if (!link) { |
2775 return; | 2762 return; |
2776 } | 2763 } |
2777 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2764 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2778 } | 2765 } |
OLD | NEW |