| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <ctype.h> | 7 #include <ctype.h> |
| 8 #include <algorithm> | 8 #include <algorithm> |
| 9 | 9 |
| 10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
| 90 int flags) { | 90 int flags) { |
| 91 return new CPDF_TextPage(pPage, flags); | 91 return new CPDF_TextPage(pPage, flags); |
| 92 } | 92 } |
| 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, | 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, |
| 94 int flags) { | 94 int flags) { |
| 95 return new CPDF_TextPage(pObjs, flags); | 95 return new CPDF_TextPage(pObjs, flags); |
| 96 } | 96 } |
| 97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | 97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
| 98 const IPDF_TextPage* pTextPage) { | 98 const IPDF_TextPage* pTextPage) { |
| 99 if (!pTextPage) { | 99 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
| 100 return NULL; | |
| 101 } | |
| 102 return new CPDF_TextPageFind(pTextPage); | |
| 103 } | 100 } |
| 104 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | 101 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
| 105 return new CPDF_LinkExtract(); | 102 return new CPDF_LinkExtract(); |
| 106 } | 103 } |
| 107 #define TEXT_BLANK_CHAR L' ' | 104 #define TEXT_BLANK_CHAR L' ' |
| 108 #define TEXT_LINEFEED_CHAR L'\n' | 105 #define TEXT_LINEFEED_CHAR L'\n' |
| 109 #define TEXT_RETURN_CHAR L'\r' | 106 #define TEXT_RETURN_CHAR L'\r' |
| 110 #define TEXT_EMPTY L"" | 107 #define TEXT_EMPTY L"" |
| 111 #define TEXT_BLANK L" " | 108 #define TEXT_BLANK L" " |
| 112 #define TEXT_RETURN_LINEFEED L"\r\n" | 109 #define TEXT_RETURN_LINEFEED L"\r\n" |
| 113 #define TEXT_LINEFEED L"\n" | 110 #define TEXT_LINEFEED L"\n" |
| 114 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 111 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
| 112 |
| 115 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 113 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
| 116 : m_charList(512), | 114 : m_pPage(pPage), |
| 115 m_charList(512), |
| 117 m_TempCharList(50), | 116 m_TempCharList(50), |
| 118 m_pPreTextObj(NULL), | 117 m_parserflag(flags), |
| 119 m_IsParsered(FALSE), | 118 m_pPreTextObj(nullptr), |
| 119 m_bIsParsed(false), |
| 120 m_TextlineDir(-1), | 120 m_TextlineDir(-1), |
| 121 m_CurlineRect(0, 0, 0, 0) { | 121 m_CurlineRect(0, 0, 0, 0) { |
| 122 m_pPage = pPage; | |
| 123 m_parserflag = flags; | |
| 124 m_TextBuf.EstimateSize(0, 10240); | 122 m_TextBuf.EstimateSize(0, 10240); |
| 125 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 123 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
| 126 (int)pPage->GetPageHeight(), 0); | 124 (int)pPage->GetPageHeight(), 0); |
| 127 } | 125 } |
| 126 |
| 128 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, | 127 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, |
| 129 CPDFText_ParseOptions ParserOptions) | 128 CPDFText_ParseOptions ParserOptions) |
| 130 : m_ParseOptions(ParserOptions), | 129 : m_ParseOptions(ParserOptions), |
| 130 m_pPage(pPage), |
| 131 m_charList(512), | 131 m_charList(512), |
| 132 m_TempCharList(50), | 132 m_TempCharList(50), |
| 133 m_pPreTextObj(NULL), | 133 m_parserflag(0), |
| 134 m_IsParsered(FALSE), | 134 m_pPreTextObj(nullptr), |
| 135 m_bIsParsed(false), |
| 135 m_TextlineDir(-1), | 136 m_TextlineDir(-1), |
| 136 m_CurlineRect(0, 0, 0, 0) { | 137 m_CurlineRect(0, 0, 0, 0) { |
| 137 m_pPage = pPage; | |
| 138 m_parserflag = 0; | |
| 139 m_TextBuf.EstimateSize(0, 10240); | 138 m_TextBuf.EstimateSize(0, 10240); |
| 140 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 139 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
| 141 (int)pPage->GetPageHeight(), 0); | 140 (int)pPage->GetPageHeight(), 0); |
| 142 } | 141 } |
| 142 |
| 143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) | 143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) |
| 144 : m_charList(512), | 144 : m_pPage(pPage), |
| 145 m_charList(512), |
| 145 m_TempCharList(50), | 146 m_TempCharList(50), |
| 146 m_pPreTextObj(NULL), | 147 m_parserflag(flags), |
| 147 m_IsParsered(FALSE), | 148 m_pPreTextObj(nullptr), |
| 149 m_bIsParsed(false), |
| 148 m_TextlineDir(-1), | 150 m_TextlineDir(-1), |
| 149 m_CurlineRect(0, 0, 0, 0) { | 151 m_CurlineRect(0, 0, 0, 0) { |
| 150 m_pPage = pPage; | |
| 151 m_parserflag = flags; | |
| 152 m_TextBuf.EstimateSize(0, 10240); | 152 m_TextBuf.EstimateSize(0, 10240); |
| 153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | 153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); |
| 154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); | 154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); |
| 155 } | 155 } |
| 156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { | 156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { |
| 157 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 157 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
| 158 } | 158 } |
| 159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
| 160 switch (charInfo.m_Unicode) { | 160 switch (charInfo.m_Unicode) { |
| 161 case 0x2: | 161 case 0x2: |
| 162 case 0x3: | 162 case 0x3: |
| 163 case 0x93: | 163 case 0x93: |
| 164 case 0x94: | 164 case 0x94: |
| 165 case 0x96: | 165 case 0x96: |
| 166 case 0x97: | 166 case 0x97: |
| 167 case 0x98: | 167 case 0x98: |
| 168 case 0xfffe: | 168 case 0xfffe: |
| 169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
| 170 default: | 170 default: |
| 171 return false; | 171 return false; |
| 172 } | 172 } |
| 173 } | 173 } |
| 174 FX_BOOL CPDF_TextPage::ParseTextPage() { | 174 FX_BOOL CPDF_TextPage::ParseTextPage() { |
| 175 if (!m_pPage) { | 175 m_bIsParsed = false; |
| 176 m_IsParsered = FALSE; | 176 if (!m_pPage) |
| 177 return FALSE; | 177 return FALSE; |
| 178 } | 178 |
| 179 m_IsParsered = FALSE; | |
| 180 m_TextBuf.Clear(); | 179 m_TextBuf.Clear(); |
| 181 m_charList.RemoveAll(); | 180 m_charList.RemoveAll(); |
| 182 m_pPreTextObj = NULL; | 181 m_pPreTextObj = NULL; |
| 183 ProcessObject(); | 182 ProcessObject(); |
| 184 m_IsParsered = TRUE; | 183 m_bIsParsed = true; |
| 185 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 184 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
| 186 m_CharIndex.RemoveAll(); | 185 m_CharIndex.RemoveAll(); |
| 187 int nCount = m_charList.GetSize(); | 186 int nCount = m_charList.GetSize(); |
| 188 if (nCount) { | 187 if (nCount) { |
| 189 m_CharIndex.Add(0); | 188 m_CharIndex.Add(0); |
| 190 } | 189 } |
| 191 for (int i = 0; i < nCount; i++) { | 190 for (int i = 0; i < nCount; i++) { |
| 192 int indexSize = m_CharIndex.GetSize(); | 191 int indexSize = m_CharIndex.GetSize(); |
| 193 FX_BOOL bNormal = FALSE; | 192 FX_BOOL bNormal = FALSE; |
| 194 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); | 193 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 262 } | 261 } |
| 263 void CPDF_TextPage::GetRectArray(int start, | 262 void CPDF_TextPage::GetRectArray(int start, |
| 264 int nCount, | 263 int nCount, |
| 265 CFX_RectArray& rectArray) const { | 264 CFX_RectArray& rectArray) const { |
| 266 if (m_ParseOptions.m_bGetCharCodeOnly) { | 265 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 267 return; | 266 return; |
| 268 } | 267 } |
| 269 if (start < 0 || nCount == 0) { | 268 if (start < 0 || nCount == 0) { |
| 270 return; | 269 return; |
| 271 } | 270 } |
| 272 if (!m_IsParsered) { | 271 if (!m_bIsParsed) { |
| 273 return; | 272 return; |
| 274 } | 273 } |
| 275 PAGECHAR_INFO info_curchar; | 274 PAGECHAR_INFO info_curchar; |
| 276 CPDF_TextObject* pCurObj = NULL; | 275 CPDF_TextObject* pCurObj = NULL; |
| 277 CFX_FloatRect rect; | 276 CFX_FloatRect rect; |
| 278 int curPos = start; | 277 int curPos = start; |
| 279 FX_BOOL flagNewRect = TRUE; | 278 FX_BOOL flagNewRect = TRUE; |
| 280 if (nCount + start > m_charList.GetSize() || nCount == -1) { | 279 if (nCount + start > m_charList.GetSize() || nCount == -1) { |
| 281 nCount = m_charList.GetSize() - start; | 280 nCount = m_charList.GetSize() - start; |
| 282 } | 281 } |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 345 rect.bottom = info_curchar.m_CharBox.bottom; | 344 rect.bottom = info_curchar.m_CharBox.bottom; |
| 346 } | 345 } |
| 347 } | 346 } |
| 348 } | 347 } |
| 349 rectArray.Add(rect); | 348 rectArray.Add(rect); |
| 350 return; | 349 return; |
| 351 } | 350 } |
| 352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, | 351 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
| 353 FX_FLOAT xTolerance, | 352 FX_FLOAT xTolerance, |
| 354 FX_FLOAT yTolerance) const { | 353 FX_FLOAT yTolerance) const { |
| 355 if (m_ParseOptions.m_bGetCharCodeOnly) { | 354 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 356 return -3; | 355 return -3; |
| 357 } | 356 |
| 358 if (!m_IsParsered) { | |
| 359 return -3; | |
| 360 } | |
| 361 int pos = 0; | 357 int pos = 0; |
| 362 int NearPos = -1; | 358 int NearPos = -1; |
| 363 double xdif = 5000, ydif = 5000; | 359 double xdif = 5000, ydif = 5000; |
| 364 while (pos < m_charList.GetSize()) { | 360 while (pos < m_charList.GetSize()) { |
| 365 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); | 361 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); |
| 366 CFX_FloatRect charrect = charinfo.m_CharBox; | 362 CFX_FloatRect charrect = charinfo.m_CharBox; |
| 367 if (charrect.Contains(point.x, point.y)) { | 363 if (charrect.Contains(point.x, point.y)) { |
| 368 break; | 364 break; |
| 369 } | 365 } |
| 370 if (xTolerance > 0 || yTolerance > 0) { | 366 if (xTolerance > 0 || yTolerance > 0) { |
| (...skipping 22 matching lines...) Expand all Loading... |
| 393 } | 389 } |
| 394 ++pos; | 390 ++pos; |
| 395 } | 391 } |
| 396 if (pos >= m_charList.GetSize()) { | 392 if (pos >= m_charList.GetSize()) { |
| 397 pos = NearPos; | 393 pos = NearPos; |
| 398 } | 394 } |
| 399 return pos; | 395 return pos; |
| 400 } | 396 } |
| 401 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { | 397 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
| 402 CFX_WideString strText; | 398 CFX_WideString strText; |
| 403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { | 399 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 404 return strText; | 400 return strText; |
| 405 } | 401 |
| 406 int nCount = m_charList.GetSize(); | 402 int nCount = m_charList.GetSize(); |
| 407 int pos = 0; | 403 int pos = 0; |
| 408 FX_FLOAT posy = 0; | 404 FX_FLOAT posy = 0; |
| 409 FX_BOOL IsContainPreChar = FALSE; | 405 FX_BOOL IsContainPreChar = FALSE; |
| 410 FX_BOOL ISAddLineFeed = FALSE; | 406 FX_BOOL ISAddLineFeed = FALSE; |
| 411 while (pos < nCount) { | 407 while (pos < nCount) { |
| 412 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 408 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
| 413 if (IsRectIntersect(rect, charinfo.m_CharBox)) { | 409 if (IsRectIntersect(rect, charinfo.m_CharBox)) { |
| 414 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && | 410 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && |
| 415 ISAddLineFeed) { | 411 ISAddLineFeed) { |
| (...skipping 15 matching lines...) Expand all Loading... |
| 431 } | 427 } |
| 432 } else { | 428 } else { |
| 433 IsContainPreChar = FALSE; | 429 IsContainPreChar = FALSE; |
| 434 ISAddLineFeed = TRUE; | 430 ISAddLineFeed = TRUE; |
| 435 } | 431 } |
| 436 } | 432 } |
| 437 return strText; | 433 return strText; |
| 438 } | 434 } |
| 439 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, | 435 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 440 CFX_RectArray& resRectArray) const { | 436 CFX_RectArray& resRectArray) const { |
| 441 if (m_ParseOptions.m_bGetCharCodeOnly) { | 437 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 442 return; | 438 return; |
| 443 } | 439 |
| 444 if (!m_IsParsered) { | |
| 445 return; | |
| 446 } | |
| 447 CFX_FloatRect curRect; | 440 CFX_FloatRect curRect; |
| 448 FX_BOOL flagNewRect = TRUE; | 441 FX_BOOL flagNewRect = TRUE; |
| 449 CPDF_TextObject* pCurObj = NULL; | 442 CPDF_TextObject* pCurObj = NULL; |
| 450 int nCount = m_charList.GetSize(); | 443 int nCount = m_charList.GetSize(); |
| 451 int pos = 0; | 444 int pos = 0; |
| 452 while (pos < nCount) { | 445 while (pos < nCount) { |
| 453 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 446 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
| 454 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 447 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 455 continue; | 448 continue; |
| 456 } | 449 } |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 491 FX_FLOAT y, | 484 FX_FLOAT y, |
| 492 FX_FLOAT xTolerance, | 485 FX_FLOAT xTolerance, |
| 493 FX_FLOAT yTolerance) const { | 486 FX_FLOAT yTolerance) const { |
| 494 if (m_ParseOptions.m_bGetCharCodeOnly) { | 487 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 495 return -3; | 488 return -3; |
| 496 } | 489 } |
| 497 CPDF_Point point(x, y); | 490 CPDF_Point point(x, y); |
| 498 return GetIndexAtPos(point, xTolerance, yTolerance); | 491 return GetIndexAtPos(point, xTolerance, yTolerance); |
| 499 } | 492 } |
| 500 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { | 493 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { |
| 501 if (m_ParseOptions.m_bGetCharCodeOnly) { | 494 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 502 return; | 495 return; |
| 503 } | 496 |
| 504 if (!m_IsParsered) { | 497 if (index < 0 || index >= m_charList.GetSize()) |
| 505 return; | 498 return; |
| 506 } | 499 |
| 507 if (index < 0 || index >= m_charList.GetSize()) { | |
| 508 return; | |
| 509 } | |
| 510 PAGECHAR_INFO charinfo; | 500 PAGECHAR_INFO charinfo; |
| 511 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 501 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 512 info.m_Charcode = charinfo.m_CharCode; | 502 info.m_Charcode = charinfo.m_CharCode; |
| 513 info.m_OriginX = charinfo.m_OriginX; | 503 info.m_OriginX = charinfo.m_OriginX; |
| 514 info.m_OriginY = charinfo.m_OriginY; | 504 info.m_OriginY = charinfo.m_OriginY; |
| 515 info.m_Unicode = charinfo.m_Unicode; | 505 info.m_Unicode = charinfo.m_Unicode; |
| 516 info.m_Flag = charinfo.m_Flag; | 506 info.m_Flag = charinfo.m_Flag; |
| 517 info.m_CharBox = charinfo.m_CharBox; | 507 info.m_CharBox = charinfo.m_CharBox; |
| 518 info.m_pTextObj = charinfo.m_pTextObj; | 508 info.m_pTextObj = charinfo.m_pTextObj; |
| 519 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { | 509 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 554 if (endIndex >= m_charList.GetSize()) { | 544 if (endIndex >= m_charList.GetSize()) { |
| 555 break; | 545 break; |
| 556 } | 546 } |
| 557 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); | 547 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); |
| 558 } | 548 } |
| 559 endIndex--; | 549 endIndex--; |
| 560 nCount = endIndex - start + 1; | 550 nCount = endIndex - start + 1; |
| 561 } | 551 } |
| 562 } | 552 } |
| 563 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { | 553 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
| 564 if (!m_IsParsered || nCount == 0) { | 554 if (!m_bIsParsed || nCount == 0) |
| 565 return L""; | 555 return L""; |
| 566 } | 556 |
| 567 if (start < 0) { | 557 if (start < 0) |
| 568 start = 0; | 558 start = 0; |
| 569 } | 559 |
| 570 if (nCount == -1) { | 560 if (nCount == -1) { |
| 571 nCount = m_charList.GetSize() - start; | 561 nCount = m_charList.GetSize() - start; |
| 572 return m_TextBuf.GetWideString().Mid(start, | 562 return m_TextBuf.GetWideString().Mid(start, |
| 573 m_TextBuf.GetWideString().GetLength()); | 563 m_TextBuf.GetWideString().GetLength()); |
| 574 } | 564 } |
| 575 if (nCount <= 0 || m_charList.GetSize() <= 0) { | 565 if (nCount <= 0 || m_charList.GetSize() <= 0) { |
| 576 return L""; | 566 return L""; |
| 577 } | 567 } |
| 578 if (nCount + start > m_charList.GetSize() - 1) { | 568 if (nCount + start > m_charList.GetSize() - 1) { |
| 579 nCount = m_charList.GetSize() - start; | 569 nCount = m_charList.GetSize() - start; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 603 charinfo = | 593 charinfo = |
| 604 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); | 594 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); |
| 605 } | 595 } |
| 606 nCount = start + nCount - nCountOffset - startindex; | 596 nCount = start + nCount - nCountOffset - startindex; |
| 607 if (nCount <= 0) { | 597 if (nCount <= 0) { |
| 608 return L""; | 598 return L""; |
| 609 } | 599 } |
| 610 return m_TextBuf.GetWideString().Mid(startindex, nCount); | 600 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
| 611 } | 601 } |
| 612 int CPDF_TextPage::CountRects(int start, int nCount) { | 602 int CPDF_TextPage::CountRects(int start, int nCount) { |
| 613 if (m_ParseOptions.m_bGetCharCodeOnly) { | 603 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) |
| 614 return -1; | 604 return -1; |
| 615 } | 605 |
| 616 if (!m_IsParsered) { | |
| 617 return -1; | |
| 618 } | |
| 619 if (start < 0) { | |
| 620 return -1; | |
| 621 } | |
| 622 if (nCount == -1 || nCount + start > m_charList.GetSize()) { | 606 if (nCount == -1 || nCount + start > m_charList.GetSize()) { |
| 623 nCount = m_charList.GetSize() - start; | 607 nCount = m_charList.GetSize() - start; |
| 624 } | 608 } |
| 625 m_SelRects.RemoveAll(); | 609 m_SelRects.RemoveAll(); |
| 626 GetRectArray(start, nCount, m_SelRects); | 610 GetRectArray(start, nCount, m_SelRects); |
| 627 return m_SelRects.GetSize(); | 611 return m_SelRects.GetSize(); |
| 628 } | 612 } |
| 629 void CPDF_TextPage::GetRect(int rectIndex, | 613 void CPDF_TextPage::GetRect(int rectIndex, |
| 630 FX_FLOAT& left, | 614 FX_FLOAT& left, |
| 631 FX_FLOAT& top, | 615 FX_FLOAT& top, |
| 632 FX_FLOAT& right, | 616 FX_FLOAT& right, |
| 633 FX_FLOAT& bottom) const { | 617 FX_FLOAT& bottom) const { |
| 634 if (m_ParseOptions.m_bGetCharCodeOnly) { | 618 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 635 return; | 619 return; |
| 636 } | 620 |
| 637 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { | 621 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
| 638 return; | 622 return; |
| 639 } | 623 |
| 640 left = m_SelRects.GetAt(rectIndex).left; | 624 left = m_SelRects.GetAt(rectIndex).left; |
| 641 top = m_SelRects.GetAt(rectIndex).top; | 625 top = m_SelRects.GetAt(rectIndex).top; |
| 642 right = m_SelRects.GetAt(rectIndex).right; | 626 right = m_SelRects.GetAt(rectIndex).right; |
| 643 bottom = m_SelRects.GetAt(rectIndex).bottom; | 627 bottom = m_SelRects.GetAt(rectIndex).bottom; |
| 644 } | 628 } |
| 645 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | 629 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
| 646 if (m_ParseOptions.m_bGetCharCodeOnly) { | 630 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 647 return FALSE; | 631 return FALSE; |
| 648 } | 632 } |
| 649 if (end == start) { | 633 if (end == start) { |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 696 GetBoundedSegment(n - 1, start, count); | 680 GetBoundedSegment(n - 1, start, count); |
| 697 end = start + count - 1; | 681 end = start + count - 1; |
| 698 GetBoundedSegment(0, start, count); | 682 GetBoundedSegment(0, start, count); |
| 699 } else { | 683 } else { |
| 700 GetBoundedSegment(0, start, count); | 684 GetBoundedSegment(0, start, count); |
| 701 end = start + count - 1; | 685 end = start + count - 1; |
| 702 } | 686 } |
| 703 return GetBaselineRotate(start, end, Rotate); | 687 return GetBaselineRotate(start, end, Rotate); |
| 704 } | 688 } |
| 705 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | 689 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
| 706 if (m_ParseOptions.m_bGetCharCodeOnly) { | 690 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 707 return FALSE; | 691 return FALSE; |
| 708 } | 692 |
| 709 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { | 693 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) |
| 710 return FALSE; | 694 return FALSE; |
| 711 } | 695 |
| 712 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | 696 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
| 713 return GetBaselineRotate(rect, Rotate); | 697 return GetBaselineRotate(rect, Rotate); |
| 714 } | 698 } |
| 715 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 699 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| 716 FX_FLOAT top, | 700 FX_FLOAT top, |
| 717 FX_FLOAT right, | 701 FX_FLOAT right, |
| 718 FX_FLOAT bottom, | 702 FX_FLOAT bottom, |
| 719 FX_BOOL bContains) { | 703 FX_BOOL bContains) { |
| 720 if (m_ParseOptions.m_bGetCharCodeOnly) { | 704 if (m_ParseOptions.m_bGetCharCodeOnly) |
| 721 return -1; | 705 return -1; |
| 722 } | 706 |
| 723 m_Segment.RemoveAll(); | 707 m_Segment.RemoveAll(); |
| 724 if (!m_IsParsered) { | 708 if (!m_bIsParsed) |
| 725 return -1; | 709 return -1; |
| 726 } | 710 |
| 727 CFX_FloatRect rect(left, bottom, right, top); | 711 CFX_FloatRect rect(left, bottom, right, top); |
| 728 rect.Normalize(); | 712 rect.Normalize(); |
| 729 int nCount = m_charList.GetSize(); | 713 int nCount = m_charList.GetSize(); |
| 730 int pos = 0; | 714 int pos = 0; |
| 731 FPDF_SEGMENT segment; | 715 FPDF_SEGMENT segment; |
| 732 segment.m_Start = 0; | 716 segment.m_Start = 0; |
| 733 segment.m_nCount = 0; | 717 segment.m_nCount = 0; |
| 734 int segmentStatus = 0; | 718 int segmentStatus = 0; |
| 735 FX_BOOL IsContainPreChar = FALSE; | 719 FX_BOOL IsContainPreChar = FALSE; |
| 736 while (pos < nCount) { | 720 while (pos < nCount) { |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 796 if (m_ParseOptions.m_bGetCharCodeOnly) { | 780 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 797 return; | 781 return; |
| 798 } | 782 } |
| 799 if (index < 0 || index >= m_Segment.GetSize()) { | 783 if (index < 0 || index >= m_Segment.GetSize()) { |
| 800 return; | 784 return; |
| 801 } | 785 } |
| 802 start = m_Segment.GetAt(index).m_Start; | 786 start = m_Segment.GetAt(index).m_Start; |
| 803 count = m_Segment.GetAt(index).m_nCount; | 787 count = m_Segment.GetAt(index).m_nCount; |
| 804 } | 788 } |
| 805 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 789 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| 806 if (m_ParseOptions.m_bGetCharCodeOnly) { | 790 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 807 return -1; | 791 return -1; |
| 808 } | 792 |
| 809 if (!m_IsParsered) { | 793 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
| 810 return -1; | 794 return -1; |
| 811 } | 795 |
| 812 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { | 796 if (index < 0 || index >= m_charList.GetSize()) |
| 813 return -1; | 797 return -1; |
| 814 } | 798 |
| 815 if (index < 0 || index >= m_charList.GetSize()) { | |
| 816 return -1; | |
| 817 } | |
| 818 PAGECHAR_INFO charinfo; | 799 PAGECHAR_INFO charinfo; |
| 819 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 800 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 820 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 801 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 821 return index; | 802 return index; |
| 822 } | 803 } |
| 823 if (!IsLetter(charinfo.m_Unicode)) { | 804 if (!IsLetter(charinfo.m_Unicode)) { |
| 824 return index; | 805 return index; |
| 825 } | 806 } |
| 826 int breakPos = index; | 807 int breakPos = index; |
| 827 if (direction == FPDFTEXT_LEFT) { | 808 if (direction == FPDFTEXT_LEFT) { |
| (...skipping 1721 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2549 rects.Copy(m_resArray); | 2530 rects.Copy(m_resArray); |
| 2550 } | 2531 } |
| 2551 int CPDF_TextPageFind::GetCurOrder() const { | 2532 int CPDF_TextPageFind::GetCurOrder() const { |
| 2552 return GetCharIndex(m_resStart); | 2533 return GetCharIndex(m_resStart); |
| 2553 } | 2534 } |
| 2554 int CPDF_TextPageFind::GetMatchedCount() const { | 2535 int CPDF_TextPageFind::GetMatchedCount() const { |
| 2555 int resStart = GetCharIndex(m_resStart); | 2536 int resStart = GetCharIndex(m_resStart); |
| 2556 int resEnd = GetCharIndex(m_resEnd); | 2537 int resEnd = GetCharIndex(m_resEnd); |
| 2557 return resEnd - resStart + 1; | 2538 return resEnd - resStart + 1; |
| 2558 } | 2539 } |
| 2559 CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} | 2540 |
| 2541 CPDF_LinkExtract::CPDF_LinkExtract() |
| 2542 : m_pTextPage(nullptr), m_bIsParsed(false) { |
| 2543 } |
| 2544 |
| 2560 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2545 CPDF_LinkExtract::~CPDF_LinkExtract() { |
| 2561 DeleteLinkList(); | 2546 DeleteLinkList(); |
| 2562 } | 2547 } |
| 2548 |
| 2563 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { | 2549 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { |
| 2564 if (!pTextPage || !pTextPage->IsParsed()) { | 2550 if (!pTextPage || !pTextPage->IsParsed()) |
| 2565 return FALSE; | 2551 return FALSE; |
| 2566 } | 2552 |
| 2567 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2553 m_pTextPage = (const CPDF_TextPage*)pTextPage; |
| 2568 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2554 m_strPageText = m_pTextPage->GetPageText(0, -1); |
| 2569 DeleteLinkList(); | 2555 DeleteLinkList(); |
| 2570 if (m_strPageText.IsEmpty()) { | 2556 if (m_strPageText.IsEmpty()) { |
| 2571 return FALSE; | 2557 return FALSE; |
| 2572 } | 2558 } |
| 2573 parserLink(); | 2559 ParseLink(); |
| 2574 m_IsParserd = TRUE; | 2560 m_bIsParsed = true; |
| 2575 return TRUE; | 2561 return TRUE; |
| 2576 } | 2562 } |
| 2563 |
| 2577 void CPDF_LinkExtract::DeleteLinkList() { | 2564 void CPDF_LinkExtract::DeleteLinkList() { |
| 2578 while (m_LinkList.GetSize()) { | 2565 while (m_LinkList.GetSize()) { |
| 2579 CPDF_LinkExt* linkinfo = NULL; | 2566 CPDF_LinkExt* linkinfo = NULL; |
| 2580 linkinfo = m_LinkList.GetAt(0); | 2567 linkinfo = m_LinkList.GetAt(0); |
| 2581 m_LinkList.RemoveAt(0); | 2568 m_LinkList.RemoveAt(0); |
| 2582 delete linkinfo; | 2569 delete linkinfo; |
| 2583 } | 2570 } |
| 2584 m_LinkList.RemoveAll(); | 2571 m_LinkList.RemoveAll(); |
| 2585 } | 2572 } |
| 2586 int CPDF_LinkExtract::CountLinks() const { | 2573 int CPDF_LinkExtract::CountLinks() const { |
| 2587 if (!m_IsParserd) { | 2574 if (!m_bIsParsed) { |
| 2588 return -1; | 2575 return -1; |
| 2589 } | 2576 } |
| 2590 return m_LinkList.GetSize(); | 2577 return m_LinkList.GetSize(); |
| 2591 } | 2578 } |
| 2592 void CPDF_LinkExtract::parserLink() { | 2579 void CPDF_LinkExtract::ParseLink() { |
| 2593 int start = 0, pos = 0; | 2580 int start = 0, pos = 0; |
| 2594 int TotalChar = m_pTextPage->CountChars(); | 2581 int TotalChar = m_pTextPage->CountChars(); |
| 2595 while (pos < TotalChar) { | 2582 while (pos < TotalChar) { |
| 2596 FPDF_CHAR_INFO pageChar; | 2583 FPDF_CHAR_INFO pageChar; |
| 2597 m_pTextPage->GetCharInfo(pos, pageChar); | 2584 m_pTextPage->GetCharInfo(pos, pageChar); |
| 2598 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2585 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
| 2599 pos == TotalChar - 1) { | 2586 pos == TotalChar - 1) { |
| 2600 int nCount = pos - start; | 2587 int nCount = pos - start; |
| 2601 if (pos == TotalChar - 1) { | 2588 if (pos == TotalChar - 1) { |
| 2602 nCount++; | 2589 nCount++; |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2734 int count, | 2721 int count, |
| 2735 const CFX_WideString& strUrl) { | 2722 const CFX_WideString& strUrl) { |
| 2736 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2723 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; |
| 2737 linkInfo->m_strUrl = strUrl; | 2724 linkInfo->m_strUrl = strUrl; |
| 2738 linkInfo->m_Start = start; | 2725 linkInfo->m_Start = start; |
| 2739 linkInfo->m_Count = count; | 2726 linkInfo->m_Count = count; |
| 2740 m_LinkList.Add(linkInfo); | 2727 m_LinkList.Add(linkInfo); |
| 2741 } | 2728 } |
| 2742 | 2729 |
| 2743 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | 2730 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { |
| 2744 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2731 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2745 return L""; | 2732 return L""; |
| 2746 } | 2733 } |
| 2747 CPDF_LinkExt* link = NULL; | 2734 CPDF_LinkExt* link = NULL; |
| 2748 link = m_LinkList.GetAt(index); | 2735 link = m_LinkList.GetAt(index); |
| 2749 if (!link) { | 2736 if (!link) { |
| 2750 return L""; | 2737 return L""; |
| 2751 } | 2738 } |
| 2752 return link->m_strUrl; | 2739 return link->m_strUrl; |
| 2753 } | 2740 } |
| 2754 void CPDF_LinkExtract::GetBoundedSegment(int index, | 2741 void CPDF_LinkExtract::GetBoundedSegment(int index, |
| 2755 int& start, | 2742 int& start, |
| 2756 int& count) const { | 2743 int& count) const { |
| 2757 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2744 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2758 return; | 2745 return; |
| 2759 } | 2746 } |
| 2760 CPDF_LinkExt* link = NULL; | 2747 CPDF_LinkExt* link = NULL; |
| 2761 link = m_LinkList.GetAt(index); | 2748 link = m_LinkList.GetAt(index); |
| 2762 if (!link) { | 2749 if (!link) { |
| 2763 return; | 2750 return; |
| 2764 } | 2751 } |
| 2765 start = link->m_Start; | 2752 start = link->m_Start; |
| 2766 count = link->m_Count; | 2753 count = link->m_Count; |
| 2767 } | 2754 } |
| 2768 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | 2755 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
| 2769 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2756 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2770 return; | 2757 return; |
| 2771 } | 2758 } |
| 2772 CPDF_LinkExt* link = NULL; | 2759 CPDF_LinkExt* link = NULL; |
| 2773 link = m_LinkList.GetAt(index); | 2760 link = m_LinkList.GetAt(index); |
| 2774 if (!link) { | 2761 if (!link) { |
| 2775 return; | 2762 return; |
| 2776 } | 2763 } |
| 2777 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2764 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2778 } | 2765 } |
| OLD | NEW |