Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <ctype.h> | 7 #include <ctype.h> |
| 8 #include <algorithm> | 8 #include <algorithm> |
| 9 | 9 |
| 10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 74 } | 74 } |
| 75 return baseSpace; | 75 return baseSpace; |
| 76 } | 76 } |
| 77 | 77 |
| 78 } // namespace | 78 } // namespace |
| 79 | 79 |
| 80 CPDFText_ParseOptions::CPDFText_ParseOptions() | 80 CPDFText_ParseOptions::CPDFText_ParseOptions() |
| 81 : m_bGetCharCodeOnly(FALSE), | 81 : m_bGetCharCodeOnly(FALSE), |
| 82 m_bNormalizeObjs(TRUE), | 82 m_bNormalizeObjs(TRUE), |
| 83 m_bOutputHyphen(FALSE) {} | 83 m_bOutputHyphen(FALSE) {} |
| 84 IPDF_TextPage* IPDF_TextPage::CreateTextPage( | 84 |
| 85 const CPDF_Page* pPage, | |
| 86 CPDFText_ParseOptions ParserOptions) { | |
| 87 return new CPDF_TextPage(pPage, ParserOptions); | |
| 88 } | |
| 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 85 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
| 90 int flags) { | 86 int flags) { |
| 91 return new CPDF_TextPage(pPage, flags); | 87 return new CPDF_TextPage(pPage, flags); |
| 92 } | 88 } |
| 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, | 89 |
| 94 int flags) { | |
| 95 return new CPDF_TextPage(pObjs, flags); | |
| 96 } | |
| 97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | 90 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
| 98 const IPDF_TextPage* pTextPage) { | 91 const IPDF_TextPage* pTextPage) { |
| 99 if (!pTextPage) { | 92 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
| 100 return NULL; | |
| 101 } | |
| 102 return new CPDF_TextPageFind(pTextPage); | |
| 103 } | 93 } |
| 94 | |
| 104 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | 95 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
| 105 return new CPDF_LinkExtract(); | 96 return new CPDF_LinkExtract(); |
| 106 } | 97 } |
| 98 | |
| 107 #define TEXT_BLANK_CHAR L' ' | 99 #define TEXT_BLANK_CHAR L' ' |
| 108 #define TEXT_LINEFEED_CHAR L'\n' | 100 #define TEXT_LINEFEED_CHAR L'\n' |
| 109 #define TEXT_RETURN_CHAR L'\r' | 101 #define TEXT_RETURN_CHAR L'\r' |
| 110 #define TEXT_EMPTY L"" | 102 #define TEXT_EMPTY L"" |
| 111 #define TEXT_BLANK L" " | 103 #define TEXT_BLANK L" " |
| 112 #define TEXT_RETURN_LINEFEED L"\r\n" | 104 #define TEXT_RETURN_LINEFEED L"\r\n" |
| 113 #define TEXT_LINEFEED L"\n" | 105 #define TEXT_LINEFEED L"\n" |
| 114 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 106 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
| 107 | |
| 115 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 108 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
| 116 : m_charList(512), | 109 : m_pPage(pPage), |
| 110 m_charList(512), | |
| 117 m_TempCharList(50), | 111 m_TempCharList(50), |
| 118 m_pPreTextObj(NULL), | 112 m_parserflag(flags), |
| 119 m_IsParsered(FALSE), | 113 m_pPreTextObj(nullptr), |
| 114 m_bIsParsed(false), | |
| 120 m_TextlineDir(-1), | 115 m_TextlineDir(-1), |
| 121 m_CurlineRect(0, 0, 0, 0) { | 116 m_CurlineRect(0, 0, 0, 0) { |
| 122 m_pPage = pPage; | |
| 123 m_parserflag = flags; | |
| 124 m_TextBuf.EstimateSize(0, 10240); | 117 m_TextBuf.EstimateSize(0, 10240); |
| 125 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
| 126 (int)pPage->GetPageHeight(), 0); | 119 (int)pPage->GetPageHeight(), 0); |
| 127 } | 120 } |
| 128 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, | 121 |
| 129 CPDFText_ParseOptions ParserOptions) | |
| 130 : m_ParseOptions(ParserOptions), | |
| 131 m_charList(512), | |
| 132 m_TempCharList(50), | |
| 133 m_pPreTextObj(NULL), | |
| 134 m_IsParsered(FALSE), | |
| 135 m_TextlineDir(-1), | |
| 136 m_CurlineRect(0, 0, 0, 0) { | |
| 137 m_pPage = pPage; | |
| 138 m_parserflag = 0; | |
| 139 m_TextBuf.EstimateSize(0, 10240); | |
| 140 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | |
| 141 (int)pPage->GetPageHeight(), 0); | |
| 142 } | |
| 143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) | |
| 144 : m_charList(512), | |
| 145 m_TempCharList(50), | |
| 146 m_pPreTextObj(NULL), | |
| 147 m_IsParsered(FALSE), | |
| 148 m_TextlineDir(-1), | |
| 149 m_CurlineRect(0, 0, 0, 0) { | |
| 150 m_pPage = pPage; | |
| 151 m_parserflag = flags; | |
| 152 m_TextBuf.EstimateSize(0, 10240); | |
| 153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | |
| 154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); | |
| 155 } | |
| 156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { | 122 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { |
| 157 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 123 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
| 158 } | 124 } |
| 159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 125 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
| 160 switch (charInfo.m_Unicode) { | 126 switch (charInfo.m_Unicode) { |
| 161 case 0x2: | 127 case 0x2: |
| 162 case 0x3: | 128 case 0x3: |
| 163 case 0x93: | 129 case 0x93: |
| 164 case 0x94: | 130 case 0x94: |
| 165 case 0x96: | 131 case 0x96: |
| 166 case 0x97: | 132 case 0x97: |
| 167 case 0x98: | 133 case 0x98: |
| 168 case 0xfffe: | 134 case 0xfffe: |
| 169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 135 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
| 170 default: | 136 default: |
| 171 return false; | 137 return false; |
| 172 } | 138 } |
| 173 } | 139 } |
| 174 FX_BOOL CPDF_TextPage::ParseTextPage() { | 140 FX_BOOL CPDF_TextPage::ParseTextPage() { |
| 175 if (!m_pPage) { | 141 m_bIsParsed = false; |
| 176 m_IsParsered = FALSE; | 142 if (!m_pPage) |
| 177 return FALSE; | 143 return FALSE; |
| 178 } | 144 |
| 179 m_IsParsered = FALSE; | |
| 180 m_TextBuf.Clear(); | 145 m_TextBuf.Clear(); |
| 181 m_charList.RemoveAll(); | 146 m_charList.RemoveAll(); |
| 182 m_pPreTextObj = NULL; | 147 m_pPreTextObj = NULL; |
| 183 ProcessObject(); | 148 ProcessObject(); |
| 184 m_IsParsered = TRUE; | 149 m_bIsParsed = true; |
| 185 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 150 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
| 186 m_CharIndex.RemoveAll(); | 151 m_CharIndex.RemoveAll(); |
| 187 int nCount = m_charList.GetSize(); | 152 int nCount = m_charList.GetSize(); |
| 188 if (nCount) { | 153 if (nCount) { |
| 189 m_CharIndex.Add(0); | 154 m_CharIndex.Add(0); |
| 190 } | 155 } |
| 191 for (int i = 0; i < nCount; i++) { | 156 for (int i = 0; i < nCount; i++) { |
| 192 int indexSize = m_CharIndex.GetSize(); | 157 int indexSize = m_CharIndex.GetSize(); |
| 193 FX_BOOL bNormal = FALSE; | 158 FX_BOOL bNormal = FALSE; |
| 194 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); | 159 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 262 } | 227 } |
| 263 void CPDF_TextPage::GetRectArray(int start, | 228 void CPDF_TextPage::GetRectArray(int start, |
| 264 int nCount, | 229 int nCount, |
| 265 CFX_RectArray& rectArray) const { | 230 CFX_RectArray& rectArray) const { |
| 266 if (m_ParseOptions.m_bGetCharCodeOnly) { | 231 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 267 return; | 232 return; |
| 268 } | 233 } |
| 269 if (start < 0 || nCount == 0) { | 234 if (start < 0 || nCount == 0) { |
| 270 return; | 235 return; |
| 271 } | 236 } |
| 272 if (!m_IsParsered) { | 237 if (!m_bIsParsed) { |
| 273 return; | 238 return; |
| 274 } | 239 } |
| 275 PAGECHAR_INFO info_curchar; | 240 PAGECHAR_INFO info_curchar; |
| 276 CPDF_TextObject* pCurObj = NULL; | 241 CPDF_TextObject* pCurObj = NULL; |
| 277 CFX_FloatRect rect; | 242 CFX_FloatRect rect; |
| 278 int curPos = start; | 243 int curPos = start; |
| 279 FX_BOOL flagNewRect = TRUE; | 244 FX_BOOL flagNewRect = TRUE; |
| 280 if (nCount + start > m_charList.GetSize() || nCount == -1) { | 245 if (nCount + start > m_charList.GetSize() || nCount == -1) { |
| 281 nCount = m_charList.GetSize() - start; | 246 nCount = m_charList.GetSize() - start; |
| 282 } | 247 } |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 345 rect.bottom = info_curchar.m_CharBox.bottom; | 310 rect.bottom = info_curchar.m_CharBox.bottom; |
| 346 } | 311 } |
| 347 } | 312 } |
| 348 } | 313 } |
| 349 rectArray.Add(rect); | 314 rectArray.Add(rect); |
| 350 return; | 315 return; |
| 351 } | 316 } |
| 352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, | 317 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
| 353 FX_FLOAT xTolerance, | 318 FX_FLOAT xTolerance, |
| 354 FX_FLOAT yTolerance) const { | 319 FX_FLOAT yTolerance) const { |
| 355 if (m_ParseOptions.m_bGetCharCodeOnly) { | 320 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 356 return -3; | 321 return -3; |
|
Tom Sepez
2015/09/04 16:34:56
-3. How intuitive.
| |
| 357 } | 322 |
| 358 if (!m_IsParsered) { | |
| 359 return -3; | |
| 360 } | |
| 361 int pos = 0; | 323 int pos = 0; |
| 362 int NearPos = -1; | 324 int NearPos = -1; |
| 363 double xdif = 5000, ydif = 5000; | 325 double xdif = 5000, ydif = 5000; |
| 364 while (pos < m_charList.GetSize()) { | 326 while (pos < m_charList.GetSize()) { |
| 365 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); | 327 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); |
| 366 CFX_FloatRect charrect = charinfo.m_CharBox; | 328 CFX_FloatRect charrect = charinfo.m_CharBox; |
| 367 if (charrect.Contains(point.x, point.y)) { | 329 if (charrect.Contains(point.x, point.y)) { |
| 368 break; | 330 break; |
| 369 } | 331 } |
| 370 if (xTolerance > 0 || yTolerance > 0) { | 332 if (xTolerance > 0 || yTolerance > 0) { |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 393 } | 355 } |
| 394 ++pos; | 356 ++pos; |
| 395 } | 357 } |
| 396 if (pos >= m_charList.GetSize()) { | 358 if (pos >= m_charList.GetSize()) { |
| 397 pos = NearPos; | 359 pos = NearPos; |
| 398 } | 360 } |
| 399 return pos; | 361 return pos; |
| 400 } | 362 } |
| 401 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { | 363 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
| 402 CFX_WideString strText; | 364 CFX_WideString strText; |
| 403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { | 365 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 404 return strText; | 366 return strText; |
| 405 } | 367 |
| 406 int nCount = m_charList.GetSize(); | 368 int nCount = m_charList.GetSize(); |
| 407 int pos = 0; | 369 int pos = 0; |
| 408 FX_FLOAT posy = 0; | 370 FX_FLOAT posy = 0; |
| 409 FX_BOOL IsContainPreChar = FALSE; | 371 FX_BOOL IsContainPreChar = FALSE; |
| 410 FX_BOOL ISAddLineFeed = FALSE; | 372 FX_BOOL ISAddLineFeed = FALSE; |
| 411 while (pos < nCount) { | 373 while (pos < nCount) { |
| 412 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 374 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
| 413 if (IsRectIntersect(rect, charinfo.m_CharBox)) { | 375 if (IsRectIntersect(rect, charinfo.m_CharBox)) { |
| 414 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && | 376 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && |
| 415 ISAddLineFeed) { | 377 ISAddLineFeed) { |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 431 } | 393 } |
| 432 } else { | 394 } else { |
| 433 IsContainPreChar = FALSE; | 395 IsContainPreChar = FALSE; |
| 434 ISAddLineFeed = TRUE; | 396 ISAddLineFeed = TRUE; |
| 435 } | 397 } |
| 436 } | 398 } |
| 437 return strText; | 399 return strText; |
| 438 } | 400 } |
| 439 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, | 401 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 440 CFX_RectArray& resRectArray) const { | 402 CFX_RectArray& resRectArray) const { |
| 441 if (m_ParseOptions.m_bGetCharCodeOnly) { | 403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 442 return; | 404 return; |
| 443 } | 405 |
| 444 if (!m_IsParsered) { | |
| 445 return; | |
| 446 } | |
| 447 CFX_FloatRect curRect; | 406 CFX_FloatRect curRect; |
| 448 FX_BOOL flagNewRect = TRUE; | 407 FX_BOOL flagNewRect = TRUE; |
| 449 CPDF_TextObject* pCurObj = NULL; | 408 CPDF_TextObject* pCurObj = NULL; |
| 450 int nCount = m_charList.GetSize(); | 409 int nCount = m_charList.GetSize(); |
| 451 int pos = 0; | 410 int pos = 0; |
| 452 while (pos < nCount) { | 411 while (pos < nCount) { |
| 453 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | 412 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
| 454 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 413 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 455 continue; | 414 continue; |
| 456 } | 415 } |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 491 FX_FLOAT y, | 450 FX_FLOAT y, |
| 492 FX_FLOAT xTolerance, | 451 FX_FLOAT xTolerance, |
| 493 FX_FLOAT yTolerance) const { | 452 FX_FLOAT yTolerance) const { |
| 494 if (m_ParseOptions.m_bGetCharCodeOnly) { | 453 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 495 return -3; | 454 return -3; |
| 496 } | 455 } |
| 497 CPDF_Point point(x, y); | 456 CPDF_Point point(x, y); |
| 498 return GetIndexAtPos(point, xTolerance, yTolerance); | 457 return GetIndexAtPos(point, xTolerance, yTolerance); |
| 499 } | 458 } |
| 500 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { | 459 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { |
| 501 if (m_ParseOptions.m_bGetCharCodeOnly) { | 460 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 502 return; | 461 return; |
| 503 } | 462 |
| 504 if (!m_IsParsered) { | 463 if (index < 0 || index >= m_charList.GetSize()) |
| 505 return; | 464 return; |
| 506 } | 465 |
| 507 if (index < 0 || index >= m_charList.GetSize()) { | |
| 508 return; | |
| 509 } | |
| 510 PAGECHAR_INFO charinfo; | 466 PAGECHAR_INFO charinfo; |
| 511 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 467 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 512 info.m_Charcode = charinfo.m_CharCode; | 468 info.m_Charcode = charinfo.m_CharCode; |
| 513 info.m_OriginX = charinfo.m_OriginX; | 469 info.m_OriginX = charinfo.m_OriginX; |
| 514 info.m_OriginY = charinfo.m_OriginY; | 470 info.m_OriginY = charinfo.m_OriginY; |
| 515 info.m_Unicode = charinfo.m_Unicode; | 471 info.m_Unicode = charinfo.m_Unicode; |
| 516 info.m_Flag = charinfo.m_Flag; | 472 info.m_Flag = charinfo.m_Flag; |
| 517 info.m_CharBox = charinfo.m_CharBox; | 473 info.m_CharBox = charinfo.m_CharBox; |
| 518 info.m_pTextObj = charinfo.m_pTextObj; | 474 info.m_pTextObj = charinfo.m_pTextObj; |
| 519 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { | 475 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 554 if (endIndex >= m_charList.GetSize()) { | 510 if (endIndex >= m_charList.GetSize()) { |
| 555 break; | 511 break; |
| 556 } | 512 } |
| 557 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); | 513 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); |
| 558 } | 514 } |
| 559 endIndex--; | 515 endIndex--; |
| 560 nCount = endIndex - start + 1; | 516 nCount = endIndex - start + 1; |
| 561 } | 517 } |
| 562 } | 518 } |
| 563 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { | 519 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
| 564 if (!m_IsParsered || nCount == 0) { | 520 if (!m_bIsParsed || nCount == 0) |
| 565 return L""; | 521 return L""; |
| 566 } | 522 |
| 567 if (start < 0) { | 523 if (start < 0) |
| 568 start = 0; | 524 start = 0; |
| 569 } | 525 |
| 570 if (nCount == -1) { | 526 if (nCount == -1) { |
| 571 nCount = m_charList.GetSize() - start; | 527 nCount = m_charList.GetSize() - start; |
| 572 return m_TextBuf.GetWideString().Mid(start, | 528 return m_TextBuf.GetWideString().Mid(start, |
| 573 m_TextBuf.GetWideString().GetLength()); | 529 m_TextBuf.GetWideString().GetLength()); |
| 574 } | 530 } |
| 575 if (nCount <= 0 || m_charList.GetSize() <= 0) { | 531 if (nCount <= 0 || m_charList.GetSize() <= 0) { |
| 576 return L""; | 532 return L""; |
| 577 } | 533 } |
| 578 if (nCount + start > m_charList.GetSize() - 1) { | 534 if (nCount + start > m_charList.GetSize() - 1) { |
| 579 nCount = m_charList.GetSize() - start; | 535 nCount = m_charList.GetSize() - start; |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 603 charinfo = | 559 charinfo = |
| 604 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); | 560 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); |
| 605 } | 561 } |
| 606 nCount = start + nCount - nCountOffset - startindex; | 562 nCount = start + nCount - nCountOffset - startindex; |
| 607 if (nCount <= 0) { | 563 if (nCount <= 0) { |
| 608 return L""; | 564 return L""; |
| 609 } | 565 } |
| 610 return m_TextBuf.GetWideString().Mid(startindex, nCount); | 566 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
| 611 } | 567 } |
| 612 int CPDF_TextPage::CountRects(int start, int nCount) { | 568 int CPDF_TextPage::CountRects(int start, int nCount) { |
| 613 if (m_ParseOptions.m_bGetCharCodeOnly) { | 569 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) |
| 614 return -1; | 570 return -1; |
| 615 } | 571 |
| 616 if (!m_IsParsered) { | |
| 617 return -1; | |
| 618 } | |
| 619 if (start < 0) { | |
| 620 return -1; | |
| 621 } | |
| 622 if (nCount == -1 || nCount + start > m_charList.GetSize()) { | 572 if (nCount == -1 || nCount + start > m_charList.GetSize()) { |
| 623 nCount = m_charList.GetSize() - start; | 573 nCount = m_charList.GetSize() - start; |
| 624 } | 574 } |
| 625 m_SelRects.RemoveAll(); | 575 m_SelRects.RemoveAll(); |
| 626 GetRectArray(start, nCount, m_SelRects); | 576 GetRectArray(start, nCount, m_SelRects); |
| 627 return m_SelRects.GetSize(); | 577 return m_SelRects.GetSize(); |
| 628 } | 578 } |
| 629 void CPDF_TextPage::GetRect(int rectIndex, | 579 void CPDF_TextPage::GetRect(int rectIndex, |
| 630 FX_FLOAT& left, | 580 FX_FLOAT& left, |
| 631 FX_FLOAT& top, | 581 FX_FLOAT& top, |
| 632 FX_FLOAT& right, | 582 FX_FLOAT& right, |
| 633 FX_FLOAT& bottom) const { | 583 FX_FLOAT& bottom) const { |
| 634 if (m_ParseOptions.m_bGetCharCodeOnly) { | 584 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 635 return; | 585 return; |
| 636 } | 586 |
| 637 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { | 587 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
| 638 return; | 588 return; |
| 639 } | 589 |
| 640 left = m_SelRects.GetAt(rectIndex).left; | 590 left = m_SelRects.GetAt(rectIndex).left; |
| 641 top = m_SelRects.GetAt(rectIndex).top; | 591 top = m_SelRects.GetAt(rectIndex).top; |
| 642 right = m_SelRects.GetAt(rectIndex).right; | 592 right = m_SelRects.GetAt(rectIndex).right; |
| 643 bottom = m_SelRects.GetAt(rectIndex).bottom; | 593 bottom = m_SelRects.GetAt(rectIndex).bottom; |
| 644 } | 594 } |
| 645 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | 595 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
| 646 if (m_ParseOptions.m_bGetCharCodeOnly) { | 596 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 647 return FALSE; | 597 return FALSE; |
| 648 } | 598 } |
| 649 if (end == start) { | 599 if (end == start) { |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 696 GetBoundedSegment(n - 1, start, count); | 646 GetBoundedSegment(n - 1, start, count); |
| 697 end = start + count - 1; | 647 end = start + count - 1; |
| 698 GetBoundedSegment(0, start, count); | 648 GetBoundedSegment(0, start, count); |
| 699 } else { | 649 } else { |
| 700 GetBoundedSegment(0, start, count); | 650 GetBoundedSegment(0, start, count); |
| 701 end = start + count - 1; | 651 end = start + count - 1; |
| 702 } | 652 } |
| 703 return GetBaselineRotate(start, end, Rotate); | 653 return GetBaselineRotate(start, end, Rotate); |
| 704 } | 654 } |
| 705 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | 655 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
| 706 if (m_ParseOptions.m_bGetCharCodeOnly) { | 656 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 707 return FALSE; | 657 return FALSE; |
| 708 } | 658 |
| 709 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { | 659 if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) |
| 710 return FALSE; | 660 return FALSE; |
| 711 } | 661 |
| 712 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | 662 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
| 713 return GetBaselineRotate(rect, Rotate); | 663 return GetBaselineRotate(rect, Rotate); |
| 714 } | 664 } |
| 715 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 665 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| 716 FX_FLOAT top, | 666 FX_FLOAT top, |
| 717 FX_FLOAT right, | 667 FX_FLOAT right, |
| 718 FX_FLOAT bottom, | 668 FX_FLOAT bottom, |
| 719 FX_BOOL bContains) { | 669 FX_BOOL bContains) { |
| 720 if (m_ParseOptions.m_bGetCharCodeOnly) { | 670 if (m_ParseOptions.m_bGetCharCodeOnly) |
| 721 return -1; | 671 return -1; |
| 722 } | 672 |
| 723 m_Segment.RemoveAll(); | 673 m_Segment.RemoveAll(); |
| 724 if (!m_IsParsered) { | 674 if (!m_bIsParsed) |
| 725 return -1; | 675 return -1; |
| 726 } | 676 |
| 727 CFX_FloatRect rect(left, bottom, right, top); | 677 CFX_FloatRect rect(left, bottom, right, top); |
| 728 rect.Normalize(); | 678 rect.Normalize(); |
| 729 int nCount = m_charList.GetSize(); | 679 int nCount = m_charList.GetSize(); |
| 730 int pos = 0; | 680 int pos = 0; |
| 731 FPDF_SEGMENT segment; | 681 FPDF_SEGMENT segment; |
| 732 segment.m_Start = 0; | 682 segment.m_Start = 0; |
| 733 segment.m_nCount = 0; | 683 segment.m_nCount = 0; |
| 734 int segmentStatus = 0; | 684 int segmentStatus = 0; |
| 735 FX_BOOL IsContainPreChar = FALSE; | 685 FX_BOOL IsContainPreChar = FALSE; |
| 736 while (pos < nCount) { | 686 while (pos < nCount) { |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 796 if (m_ParseOptions.m_bGetCharCodeOnly) { | 746 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 797 return; | 747 return; |
| 798 } | 748 } |
| 799 if (index < 0 || index >= m_Segment.GetSize()) { | 749 if (index < 0 || index >= m_Segment.GetSize()) { |
| 800 return; | 750 return; |
| 801 } | 751 } |
| 802 start = m_Segment.GetAt(index).m_Start; | 752 start = m_Segment.GetAt(index).m_Start; |
| 803 count = m_Segment.GetAt(index).m_nCount; | 753 count = m_Segment.GetAt(index).m_nCount; |
| 804 } | 754 } |
| 805 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 755 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| 806 if (m_ParseOptions.m_bGetCharCodeOnly) { | 756 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 807 return -1; | 757 return -1; |
| 808 } | 758 |
| 809 if (!m_IsParsered) { | 759 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
| 810 return -1; | 760 return -1; |
| 811 } | 761 |
| 812 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { | 762 if (index < 0 || index >= m_charList.GetSize()) |
| 813 return -1; | 763 return -1; |
| 814 } | 764 |
| 815 if (index < 0 || index >= m_charList.GetSize()) { | |
| 816 return -1; | |
| 817 } | |
| 818 PAGECHAR_INFO charinfo; | 765 PAGECHAR_INFO charinfo; |
| 819 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 766 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 820 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 767 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 821 return index; | 768 return index; |
| 822 } | 769 } |
| 823 if (!IsLetter(charinfo.m_Unicode)) { | 770 if (!IsLetter(charinfo.m_Unicode)) { |
| 824 return index; | 771 return index; |
| 825 } | 772 } |
| 826 int breakPos = index; | 773 int breakPos = index; |
| 827 if (direction == FPDFTEXT_LEFT) { | 774 if (direction == FPDFTEXT_LEFT) { |
| (...skipping 1727 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2555 rects.Copy(m_resArray); | 2502 rects.Copy(m_resArray); |
| 2556 } | 2503 } |
| 2557 int CPDF_TextPageFind::GetCurOrder() const { | 2504 int CPDF_TextPageFind::GetCurOrder() const { |
| 2558 return GetCharIndex(m_resStart); | 2505 return GetCharIndex(m_resStart); |
| 2559 } | 2506 } |
| 2560 int CPDF_TextPageFind::GetMatchedCount() const { | 2507 int CPDF_TextPageFind::GetMatchedCount() const { |
| 2561 int resStart = GetCharIndex(m_resStart); | 2508 int resStart = GetCharIndex(m_resStart); |
| 2562 int resEnd = GetCharIndex(m_resEnd); | 2509 int resEnd = GetCharIndex(m_resEnd); |
| 2563 return resEnd - resStart + 1; | 2510 return resEnd - resStart + 1; |
| 2564 } | 2511 } |
| 2565 CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} | 2512 |
| 2513 CPDF_LinkExtract::CPDF_LinkExtract() | |
| 2514 : m_pTextPage(nullptr), m_bIsParsed(false) { | |
| 2515 } | |
| 2516 | |
| 2566 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2517 CPDF_LinkExtract::~CPDF_LinkExtract() { |
| 2567 DeleteLinkList(); | 2518 DeleteLinkList(); |
| 2568 } | 2519 } |
| 2520 | |
| 2569 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { | 2521 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { |
| 2570 if (!pTextPage || !pTextPage->IsParsered()) { | 2522 if (!pTextPage || !pTextPage->IsParsed()) |
| 2571 return FALSE; | 2523 return FALSE; |
| 2572 } | 2524 |
| 2573 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2525 m_pTextPage = (const CPDF_TextPage*)pTextPage; |
| 2574 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2526 m_strPageText = m_pTextPage->GetPageText(0, -1); |
| 2575 DeleteLinkList(); | 2527 DeleteLinkList(); |
| 2576 if (m_strPageText.IsEmpty()) { | 2528 if (m_strPageText.IsEmpty()) { |
| 2577 return FALSE; | 2529 return FALSE; |
| 2578 } | 2530 } |
| 2579 parserLink(); | 2531 ParseLink(); |
| 2580 m_IsParserd = TRUE; | 2532 m_bIsParsed = true; |
| 2581 return TRUE; | 2533 return TRUE; |
| 2582 } | 2534 } |
| 2535 | |
| 2583 void CPDF_LinkExtract::DeleteLinkList() { | 2536 void CPDF_LinkExtract::DeleteLinkList() { |
| 2584 while (m_LinkList.GetSize()) { | 2537 while (m_LinkList.GetSize()) { |
| 2585 CPDF_LinkExt* linkinfo = NULL; | 2538 CPDF_LinkExt* linkinfo = NULL; |
| 2586 linkinfo = m_LinkList.GetAt(0); | 2539 linkinfo = m_LinkList.GetAt(0); |
| 2587 m_LinkList.RemoveAt(0); | 2540 m_LinkList.RemoveAt(0); |
| 2588 delete linkinfo; | 2541 delete linkinfo; |
| 2589 } | 2542 } |
| 2590 m_LinkList.RemoveAll(); | 2543 m_LinkList.RemoveAll(); |
| 2591 } | 2544 } |
| 2592 int CPDF_LinkExtract::CountLinks() const { | 2545 int CPDF_LinkExtract::CountLinks() const { |
| 2593 if (!m_IsParserd) { | 2546 if (!m_bIsParsed) { |
| 2594 return -1; | 2547 return -1; |
| 2595 } | 2548 } |
| 2596 return m_LinkList.GetSize(); | 2549 return m_LinkList.GetSize(); |
| 2597 } | 2550 } |
| 2598 void CPDF_LinkExtract::parserLink() { | 2551 void CPDF_LinkExtract::ParseLink() { |
| 2599 int start = 0, pos = 0; | 2552 int start = 0, pos = 0; |
| 2600 int TotalChar = m_pTextPage->CountChars(); | 2553 int TotalChar = m_pTextPage->CountChars(); |
| 2601 while (pos < TotalChar) { | 2554 while (pos < TotalChar) { |
| 2602 FPDF_CHAR_INFO pageChar; | 2555 FPDF_CHAR_INFO pageChar; |
| 2603 m_pTextPage->GetCharInfo(pos, pageChar); | 2556 m_pTextPage->GetCharInfo(pos, pageChar); |
| 2604 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2557 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
| 2605 pos == TotalChar - 1) { | 2558 pos == TotalChar - 1) { |
| 2606 int nCount = pos - start; | 2559 int nCount = pos - start; |
| 2607 if (pos == TotalChar - 1) { | 2560 if (pos == TotalChar - 1) { |
| 2608 nCount++; | 2561 nCount++; |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2741 int count, | 2694 int count, |
| 2742 const CFX_WideString& strUrl) { | 2695 const CFX_WideString& strUrl) { |
| 2743 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2696 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; |
| 2744 linkInfo->m_strUrl = strUrl; | 2697 linkInfo->m_strUrl = strUrl; |
| 2745 linkInfo->m_Start = start; | 2698 linkInfo->m_Start = start; |
| 2746 linkInfo->m_Count = count; | 2699 linkInfo->m_Count = count; |
| 2747 m_LinkList.Add(linkInfo); | 2700 m_LinkList.Add(linkInfo); |
| 2748 return TRUE; | 2701 return TRUE; |
| 2749 } | 2702 } |
| 2750 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | 2703 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { |
| 2751 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2704 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2752 return L""; | 2705 return L""; |
| 2753 } | 2706 } |
| 2754 CPDF_LinkExt* link = NULL; | 2707 CPDF_LinkExt* link = NULL; |
| 2755 link = m_LinkList.GetAt(index); | 2708 link = m_LinkList.GetAt(index); |
| 2756 if (!link) { | 2709 if (!link) { |
| 2757 return L""; | 2710 return L""; |
| 2758 } | 2711 } |
| 2759 return link->m_strUrl; | 2712 return link->m_strUrl; |
| 2760 } | 2713 } |
| 2761 void CPDF_LinkExtract::GetBoundedSegment(int index, | 2714 void CPDF_LinkExtract::GetBoundedSegment(int index, |
| 2762 int& start, | 2715 int& start, |
| 2763 int& count) const { | 2716 int& count) const { |
| 2764 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2717 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2765 return; | 2718 return; |
| 2766 } | 2719 } |
| 2767 CPDF_LinkExt* link = NULL; | 2720 CPDF_LinkExt* link = NULL; |
| 2768 link = m_LinkList.GetAt(index); | 2721 link = m_LinkList.GetAt(index); |
| 2769 if (!link) { | 2722 if (!link) { |
| 2770 return; | 2723 return; |
| 2771 } | 2724 } |
| 2772 start = link->m_Start; | 2725 start = link->m_Start; |
| 2773 count = link->m_Count; | 2726 count = link->m_Count; |
| 2774 } | 2727 } |
| 2775 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | 2728 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
| 2776 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2729 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2777 return; | 2730 return; |
| 2778 } | 2731 } |
| 2779 CPDF_LinkExt* link = NULL; | 2732 CPDF_LinkExt* link = NULL; |
| 2780 link = m_LinkList.GetAt(index); | 2733 link = m_LinkList.GetAt(index); |
| 2781 if (!link) { | 2734 if (!link) { |
| 2782 return; | 2735 return; |
| 2783 } | 2736 } |
| 2784 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2737 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2785 } | 2738 } |
| OLD | NEW |