OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../../third_party/base/nonstd_unique_ptr.h" | 7 #include "../../../third_party/base/nonstd_unique_ptr.h" |
8 #include "../../include/fpdfapi/fpdf_page.h" | 8 #include "../../include/fpdfapi/fpdf_page.h" |
9 #include "../../include/fpdfapi/fpdf_pageobj.h" | 9 #include "../../include/fpdfapi/fpdf_pageobj.h" |
10 #include "../../include/fpdfapi/fpdf_resource.h" | 10 #include "../../include/fpdfapi/fpdf_resource.h" |
11 #include "../../include/fpdftext/fpdf_text.h" | 11 #include "../../include/fpdftext/fpdf_text.h" |
12 #include "../../include/fxcrt/fx_arb.h" | 12 #include "../../include/fxcrt/fx_arb.h" |
13 #include "../../include/fxcrt/fx_ucd.h" | 13 #include "../../include/fxcrt/fx_ucd.h" |
14 #include "text_int.h" | 14 #include "text_int.h" |
15 #include "txtproc.h" | 15 #include "txtproc.h" |
16 | 16 |
17 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, const FX_CHAR* d
efchar) | 17 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, const FX_CHAR* d
efchar) |
18 { | 18 { |
19 if (destcp == 0) { | 19 if (destcp == 0) { |
20 if (unicode < 0x80) { | 20 if (unicode < 0x80) { |
21 return CFX_ByteString((char)unicode); | 21 return CFX_ByteString((char)unicode); |
22 } | 22 } |
23 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 23 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
24 if (altstr) { | 24 if (altstr) { |
25 return CFX_ByteString(altstr, -1); | 25 return CFX_ByteString(altstr, -1); |
26 } | 26 } |
27 return CFX_ByteString(defchar, -1); | 27 return CFX_ByteString(defchar, -1); |
28 } | 28 } |
29 FX_BOOL bDef = FALSE; | |
30 char buf[10]; | 29 char buf[10]; |
31 int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 1
0, NULL, &bDef); | 30 int iDef = 0; |
32 if (ret && !bDef) { | 31 int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 1
0, NULL, &iDef); |
| 32 if (ret && !iDef) { |
33 return CFX_ByteString(buf, ret); | 33 return CFX_ByteString(buf, ret); |
34 } | 34 } |
35 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 35 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
36 if (altstr) { | 36 if (altstr) { |
37 return CFX_ByteString(altstr, -1); | 37 return CFX_ByteString(altstr, -1); |
38 } | 38 } |
39 return CFX_ByteString(defchar, -1); | 39 return CFX_ByteString(defchar, -1); |
40 } | 40 } |
41 CTextPage::CTextPage() | 41 CTextPage::CTextPage() |
42 { | 42 { |
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
282 } | 282 } |
283 lastheight = pBaseLine->m_MaxFontSizeV; | 283 lastheight = pBaseLine->m_MaxFontSizeV; |
284 lastbaseline = pBaseLine->m_BaseLine; | 284 lastbaseline = pBaseLine->m_BaseLine; |
285 CFX_WideString str; | 285 CFX_WideString str; |
286 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); | 286 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); |
287 lines.Add(str); | 287 lines.Add(str); |
288 } | 288 } |
289 } | 289 } |
290 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) | 290 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) |
291 { | 291 { |
292 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); | 292 wChar = FX_GetMirrorChar(wChar, true, false); |
293 FX_WCHAR* pDst = NULL; | 293 FX_WCHAR* pDst = NULL; |
294 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 294 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
295 if (nCount < 1 ) { | 295 if (nCount < 1 ) { |
296 sDest += wChar; | 296 sDest += wChar; |
297 return; | 297 return; |
298 } | 298 } |
299 pDst = new FX_WCHAR[nCount]; | 299 pDst = new FX_WCHAR[nCount]; |
300 FX_Unicode_GetNormalization(wChar, pDst); | 300 FX_Unicode_GetNormalization(wChar, pDst); |
301 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 301 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
302 sDest += pDst[nIndex]; | 302 sDest += pDst[nIndex]; |
303 } | 303 } |
304 delete[] pDst; | 304 delete[] pDst; |
305 } | 305 } |
306 void NormalizeString(CFX_WideString& str) | 306 void NormalizeString(CFX_WideString& str) |
307 { | 307 { |
308 if (str.GetLength() <= 0) { | 308 if (str.GetLength() <= 0) { |
309 return; | 309 return; |
310 } | 310 } |
311 CFX_WideString sBuffer; | 311 CFX_WideString sBuffer; |
312 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); | 312 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); |
313 CFX_WordArray order; | 313 CFX_WordArray order; |
314 FX_BOOL bR2L = FALSE; | 314 bool bR2L = false; |
315 int32_t start = 0, count = 0, i = 0; | 315 int32_t start = 0, count = 0, i = 0; |
316 int nR2L = 0, nL2R = 0; | 316 int nR2L = 0, nL2R = 0; |
317 for (i = 0; i < str.GetLength(); i++) { | 317 for (i = 0; i < str.GetLength(); i++) { |
318 if(pBidiChar->AppendChar(str.GetAt(i))) { | 318 if(pBidiChar->AppendChar(str.GetAt(i))) { |
319 int32_t ret = pBidiChar->GetBidiInfo(start, count); | 319 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
320 order.Add(start); | 320 order.Add(start); |
321 order.Add(count); | 321 order.Add(count); |
322 order.Add(ret); | 322 order.Add(ret); |
323 if(!bR2L) { | 323 if(!bR2L) { |
324 if(ret == 2) { | 324 if(ret == 2) { |
(...skipping 11 matching lines...) Expand all Loading... |
336 order.Add(ret); | 336 order.Add(ret); |
337 if(!bR2L) { | 337 if(!bR2L) { |
338 if(ret == 2) { | 338 if(ret == 2) { |
339 nR2L++; | 339 nR2L++; |
340 } else if(ret == 1) { | 340 } else if(ret == 1) { |
341 nL2R++; | 341 nL2R++; |
342 } | 342 } |
343 } | 343 } |
344 } | 344 } |
345 if(nR2L > 0 && nR2L >= nL2R) { | 345 if(nR2L > 0 && nR2L >= nL2R) { |
346 bR2L = TRUE; | 346 bR2L = true; |
347 } | 347 } |
348 if(bR2L) { | 348 if(bR2L) { |
349 int count = order.GetSize(); | 349 int count = order.GetSize(); |
350 for(int j = count - 1; j > 0; j -= 3) { | 350 for(int j = count - 1; j > 0; j -= 3) { |
351 int ret = order.GetAt(j); | 351 int ret = order.GetAt(j); |
352 int start = order.GetAt(j - 2); | 352 int start = order.GetAt(j - 2); |
353 int count1 = order.GetAt(j - 1); | 353 int count1 = order.GetAt(j - 1); |
354 if(ret == 2 || ret == 0) { | 354 if(ret == 2 || ret == 0) { |
355 for(int i = start + count1 - 1; i >= start; i--) { | 355 for(int i = start + count1 - 1; i >= start; i--) { |
356 NormalizeCompositeChar(str[i], sBuffer); | 356 NormalizeCompositeChar(str[i], sBuffer); |
357 } | 357 } |
358 } else { | 358 } else { |
359 i = j; | 359 i = j; |
360 FX_BOOL bSymbol = FALSE; | 360 bool bSymbol = false; |
361 while(i > 0 && order.GetAt(i) != 2) { | 361 while(i > 0 && order.GetAt(i) != 2) { |
362 bSymbol = !order.GetAt(i); | 362 bSymbol = !order.GetAt(i); |
363 i -= 3; | 363 i -= 3; |
364 } | 364 } |
365 int end = start + count1 ; | 365 int end = start + count1 ; |
366 int n = 0; | 366 int n = 0; |
367 if(bSymbol) { | 367 if(bSymbol) { |
368 n = i + 6; | 368 n = i + 6; |
369 } else { | 369 } else { |
370 n = i + 3; | 370 n = i + 3; |
(...skipping 11 matching lines...) Expand all Loading... |
382 int end = start + count1 ; | 382 int end = start + count1 ; |
383 for(int m = start; m < end; m++) { | 383 for(int m = start; m < end; m++) { |
384 sBuffer += str[m]; | 384 sBuffer += str[m]; |
385 } | 385 } |
386 } | 386 } |
387 } | 387 } |
388 } | 388 } |
389 } | 389 } |
390 } else { | 390 } else { |
391 int count = order.GetSize(); | 391 int count = order.GetSize(); |
392 FX_BOOL bL2R = FALSE; | 392 bool bL2R = false; |
393 for(int j = 0; j < count; j += 3) { | 393 for(int j = 0; j < count; j += 3) { |
394 int ret = order.GetAt(j + 2); | 394 int ret = order.GetAt(j + 2); |
395 int start = order.GetAt(j); | 395 int start = order.GetAt(j); |
396 int count1 = order.GetAt(j + 1); | 396 int count1 = order.GetAt(j + 1); |
397 if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) { | 397 if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) { |
398 int i = j + 3; | 398 int i = j + 3; |
399 while(bR2L && i < count) { | 399 while(bR2L && i < count) { |
400 if(order.GetAt(i + 2) == 1) { | 400 if(order.GetAt(i + 2) == 1) { |
401 break; | 401 break; |
402 } else { | 402 } else { |
403 i += 3; | 403 i += 3; |
404 } | 404 } |
405 } | 405 } |
406 if(i == 3) { | 406 if(i == 3) { |
407 j = -3; | 407 j = -3; |
408 bL2R = TRUE; | 408 bL2R = true; |
409 continue; | 409 continue; |
410 } | 410 } |
411 int end = str.GetLength() - 1; | 411 int end = str.GetLength() - 1; |
412 if(i < count) { | 412 if(i < count) { |
413 end = order.GetAt(i) - 1; | 413 end = order.GetAt(i) - 1; |
414 } | 414 } |
415 j = i - 3; | 415 j = i - 3; |
416 for(int n = end; n >= start; n--) { | 416 for(int n = end; n >= start; n--) { |
417 NormalizeCompositeChar(str[i], sBuffer); | 417 NormalizeCompositeChar(str[i], sBuffer); |
418 } | 418 } |
419 } else { | 419 } else { |
420 int end = start + count1 ; | 420 int end = start + count1 ; |
421 for(int i = start; i < end; i++) { | 421 for(int i = start; i < end; i++) { |
422 sBuffer += str[i]; | 422 sBuffer += str[i]; |
423 } | 423 } |
424 } | 424 } |
425 } | 425 } |
426 } | 426 } |
427 str.Empty(); | 427 str.Empty(); |
428 str += sBuffer; | 428 str += sBuffer; |
429 } | 429 } |
430 static FX_BOOL IsNumber(CFX_WideString& str) | 430 static bool IsNumber(CFX_WideString& str) |
431 { | 431 { |
432 for (int i = 0; i < str.GetLength(); i ++) { | 432 for (int i = 0; i < str.GetLength(); i ++) { |
433 FX_WCHAR ch = str[i]; | 433 FX_WCHAR ch = str[i]; |
434 if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch
!= ' ') { | 434 if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch
!= ' ') { |
435 return FALSE; | 435 return false; |
436 } | 436 } |
437 } | 437 } |
438 return TRUE; | 438 return true; |
439 } | 439 } |
440 void CTextPage::FindColumns() | 440 void CTextPage::FindColumns() |
441 { | 441 { |
442 int i; | 442 int i; |
443 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 443 for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
444 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 444 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
445 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { | 445 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { |
446 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | 446 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
447 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | 447 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
448 if (pColumn == NULL) { | 448 if (pColumn == NULL) { |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
527 pText->m_Text = text; | 527 pText->m_Text = text; |
528 pText->m_Left = leftx; | 528 pText->m_Left = leftx; |
529 pText->m_Right = rightx; | 529 pText->m_Right = rightx; |
530 pText->m_Top = topy; | 530 pText->m_Top = topy; |
531 pText->m_Bottom = bottomy; | 531 pText->m_Bottom = bottomy; |
532 pText->m_SpaceWidth = spacew; | 532 pText->m_SpaceWidth = spacew; |
533 pText->m_FontSizeV = fontsize_v; | 533 pText->m_FontSizeV = fontsize_v; |
534 pText->m_pColumn = NULL; | 534 pText->m_pColumn = NULL; |
535 m_TextList.InsertAt(i, pText); | 535 m_TextList.InsertAt(i, pText); |
536 } | 536 } |
537 FX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT h
igh2, | 537 bool GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT high
2, |
538 FX_FLOAT& interlow, FX_FLOAT& interhigh); | 538 FX_FLOAT& interlow, FX_FLOAT& interhigh); |
539 FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) | 539 bool CTextBaseLine::CanMerge(CTextBaseLine* pOther) |
540 { | 540 { |
541 FX_FLOAT inter_top, inter_bottom; | 541 FX_FLOAT inter_top, inter_bottom; |
542 if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, | 542 if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, |
543 inter_bottom, inter_top)) { | 543 inter_bottom, inter_top)) { |
544 return FALSE; | 544 return false; |
545 } | 545 } |
546 FX_FLOAT inter_h = inter_top - inter_bottom; | 546 FX_FLOAT inter_h = inter_top - inter_bottom; |
547 if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m
_Bottom) / 2) { | 547 if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m
_Bottom) / 2) { |
548 return FALSE; | 548 return false; |
549 } | 549 } |
550 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); | 550 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); |
551 for (int i = 0; i < m_TextList.GetSize(); i ++) { | 551 for (int i = 0; i < m_TextList.GetSize(); i ++) { |
552 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 552 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
553 for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) { | 553 for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) { |
554 CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); | 554 CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); |
555 FX_FLOAT inter_left, inter_right; | 555 FX_FLOAT inter_left, inter_right; |
556 if (!GetIntersection(pText->m_Left, pText->m_Right, | 556 if (!GetIntersection(pText->m_Left, pText->m_Right, |
557 pOtherText->m_Left, pOtherText->m_Right, inter_
left, inter_right)) { | 557 pOtherText->m_Left, pOtherText->m_Right, inter_
left, inter_right)) { |
558 continue; | 558 continue; |
559 } | 559 } |
560 FX_FLOAT inter_w = inter_right - inter_left; | 560 FX_FLOAT inter_w = inter_right - inter_left; |
561 if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_Spa
ceWidth / 2) { | 561 if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_Spa
ceWidth / 2) { |
562 continue; | 562 continue; |
563 } | 563 } |
564 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || | 564 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || |
565 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { | 565 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { |
566 return FALSE; | 566 return false; |
567 } | 567 } |
568 } | 568 } |
569 } | 569 } |
570 return TRUE; | 570 return true; |
571 } | 571 } |
572 void CTextBaseLine::Merge(CTextBaseLine* pOther) | 572 void CTextBaseLine::Merge(CTextBaseLine* pOther) |
573 { | 573 { |
574 for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) { | 574 for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) { |
575 CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); | 575 CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); |
576 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bott
om, | 576 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bott
om, |
577 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); | 577 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); |
578 } | 578 } |
579 } | 579 } |
580 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) | 580 bool CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) |
581 { | 581 { |
582 int i; | 582 int i; |
583 for (i = 0; i < m_TextList.GetSize(); i ++) { | 583 for (i = 0; i < m_TextList.GetSize(); i ++) { |
584 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 584 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
585 if (pText->m_Text != L" ") { | 585 if (pText->m_Text != L" ") { |
586 break; | 586 break; |
587 } | 587 } |
588 } | 588 } |
589 if (i == m_TextList.GetSize()) { | 589 if (i == m_TextList.GetSize()) { |
590 return FALSE; | 590 return false; |
591 } | 591 } |
592 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 592 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
593 leftx = pText->m_Left; | 593 leftx = pText->m_Left; |
594 for (i = m_TextList.GetSize() - 1; i >= 0; i --) { | 594 for (i = m_TextList.GetSize() - 1; i >= 0; i --) { |
595 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 595 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
596 if (pText->m_Text != L" ") { | 596 if (pText->m_Text != L" ") { |
597 break; | 597 break; |
598 } | 598 } |
599 } | 599 } |
600 pText = (CTextBox*)m_TextList.GetAt(i); | 600 pText = (CTextBox*)m_TextList.GetAt(i); |
601 rightx = pText->m_Right; | 601 rightx = pText->m_Right; |
602 return TRUE; | 602 return true; |
603 } | 603 } |
604 void CTextBaseLine::MergeBoxes() | 604 void CTextBaseLine::MergeBoxes() |
605 { | 605 { |
606 int i = 0; | 606 int i = 0; |
607 while (1) { | 607 while (1) { |
608 if (i >= m_TextList.GetSize() - 1) { | 608 if (i >= m_TextList.GetSize() - 1) { |
609 break; | 609 break; |
610 } | 610 } |
611 CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); | 611 CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); |
612 CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); | 612 CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
717 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CP
DF_Dictionary* pPage, | 717 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CP
DF_Dictionary* pPage, |
718 int iMinWidth, FX_DWORD flags) | 718 int iMinWidth, FX_DWORD flags) |
719 { | 719 { |
720 lines.RemoveAll(); | 720 lines.RemoveAll(); |
721 if (pPage == NULL) { | 721 if (pPage == NULL) { |
722 return; | 722 return; |
723 } | 723 } |
724 CPDF_Page page; | 724 CPDF_Page page; |
725 page.Load(pDoc, pPage); | 725 page.Load(pDoc, pPage); |
726 CPDF_ParseOptions options; | 726 CPDF_ParseOptions options; |
727 options.m_bTextOnly = TRUE; | 727 options.m_bTextOnly = true; |
728 options.m_bSeparateForm = FALSE; | 728 options.m_bSeparateForm = false; |
729 page.ParseContent(&options); | 729 page.ParseContent(&options); |
730 CFX_FloatRect page_bbox = page.GetPageBBox(); | 730 CFX_FloatRect page_bbox = page.GetPageBBox(); |
731 if (flags & PDF2TXT_AUTO_ROTATE) { | 731 if (flags & PDF2TXT_AUTO_ROTATE) { |
732 CheckRotate(page, page_bbox); | 732 CheckRotate(page, page_bbox); |
733 } | 733 } |
734 CTextPage texts; | 734 CTextPage texts; |
735 texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; | 735 texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; |
736 texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; | 736 texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; |
737 texts.m_bBreakSpace = TRUE; | 737 texts.m_bBreakSpace = true; |
738 FX_POSITION pos = page.GetFirstObjectPosition(); | 738 FX_POSITION pos = page.GetFirstObjectPosition(); |
739 while (pos) { | 739 while (pos) { |
740 CPDF_PageObject* pObject = page.GetNextObject(pos); | 740 CPDF_PageObject* pObject = page.GetNextObject(pos); |
741 if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { | 741 if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { |
742 CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Ri
ght, pObject->m_Top); | 742 CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Ri
ght, pObject->m_Top); |
743 if (!page_bbox.Contains(rect)) { | 743 if (!page_bbox.Contains(rect)) { |
744 continue; | 744 continue; |
745 } | 745 } |
746 } | 746 } |
747 texts.ProcessObject(pObject); | 747 texts.ProcessObject(pObject); |
748 } | 748 } |
749 texts.WriteOutput(lines, iMinWidth); | 749 texts.WriteOutput(lines, iMinWidth); |
750 } | 750 } |
751 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dicti
onary* pPage, | 751 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dicti
onary* pPage, |
752 int iMinWidth, FX_DWORD flags) | 752 int iMinWidth, FX_DWORD flags) |
753 { | 753 { |
754 lines.RemoveAll(); | 754 lines.RemoveAll(); |
755 CFX_WideStringArray wlines; | 755 CFX_WideStringArray wlines; |
756 PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); | 756 PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); |
757 for (int i = 0; i < wlines.GetSize(); i ++) { | 757 for (int i = 0; i < wlines.GetSize(); i ++) { |
758 CFX_WideString wstr = wlines[i]; | 758 CFX_WideString wstr = wlines[i]; |
759 CFX_ByteString str; | 759 CFX_ByteString str; |
760 for (int c = 0; c < wstr.GetLength(); c ++) { | 760 for (int c = 0; c < wstr.GetLength(); c ++) { |
761 str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); | 761 str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); |
762 } | 762 } |
763 lines.Add(str); | 763 lines.Add(str); |
764 } | 764 } |
765 } | 765 } |
766 extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects
* pPage, FX_BOOL bUseLF, | 766 extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects
* pPage, bool bUseLF, |
767 CFX_PtrArray* pObjArray); | 767 CFX_PtrArray* pObjArray); |
768 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPD
F_Dictionary* pPage, FX_DWORD flags) | 768 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPD
F_Dictionary* pPage, FX_DWORD flags) |
769 { | 769 { |
770 buffer.EstimateSize(0, 10240); | 770 buffer.EstimateSize(0, 10240); |
771 CPDF_Page page; | 771 CPDF_Page page; |
772 page.Load(pDoc, pPage); | 772 page.Load(pDoc, pPage); |
773 CPDF_ParseOptions options; | 773 CPDF_ParseOptions options; |
774 options.m_bTextOnly = TRUE; | 774 options.m_bTextOnly = true; |
775 options.m_bSeparateForm = FALSE; | 775 options.m_bSeparateForm = false; |
776 page.ParseContent(&options); | 776 page.ParseContent(&options); |
777 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 777 _PDF_GetTextStream_Unicode(buffer, &page, true, NULL); |
778 } | 778 } |
OLD | NEW |