| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "core/fpdfdoc/pdf_vt.h" | 9 #include "core/fpdfdoc/pdf_vt.h" |
| 10 #include "core/include/fpdfdoc/fpdf_doc.h" | 10 #include "core/include/fpdfdoc/fpdf_doc.h" |
| (...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 388 return CPVT_Size(m_rcRet.Width(), m_rcRet.Height()); | 388 return CPVT_Size(m_rcRet.Width(), m_rcRet.Height()); |
| 389 } | 389 } |
| 390 CPVT_FloatRect CTypeset::Typeset() { | 390 CPVT_FloatRect CTypeset::Typeset() { |
| 391 ASSERT(m_pVT); | 391 ASSERT(m_pVT); |
| 392 m_pSection->m_LineArray.Empty(); | 392 m_pSection->m_LineArray.Empty(); |
| 393 SplitLines(TRUE, 0.0f); | 393 SplitLines(TRUE, 0.0f); |
| 394 m_pSection->m_LineArray.Clear(); | 394 m_pSection->m_LineArray.Clear(); |
| 395 OutputLines(); | 395 OutputLines(); |
| 396 return m_rcRet; | 396 return m_rcRet; |
| 397 } | 397 } |
| 398 static int special_chars[128] = { | 398 |
| 399 0x0000, 0x000C, 0x0008, 0x000C, 0x0008, 0x0000, 0x0020, 0x0000, 0x0000, | 399 static const uint8_t special_chars[128] = { |
| 400 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | 400 0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 401 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | 401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 402 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0008, 0x0008, 0x0000, | 402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00, |
| 403 0x0010, 0x0000, 0x0000, 0x0028, 0x000C, 0x0008, 0x0000, 0x0000, 0x0028, | 403 0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28, |
| 404 0x0028, 0x0028, 0x0028, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, | 404 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08, |
| 405 0x0002, 0x0002, 0x0002, 0x0002, 0x0008, 0x0008, 0x0000, 0x0000, 0x0000, | 405 0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
| 406 0x0008, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 406 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
| 407 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 407 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, |
| 408 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 408 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
| 409 0x0001, 0x000C, 0x0000, 0x0008, 0x0000, 0x0000, 0x0000, 0x0001, 0x0001, | 409 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
| 410 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 410 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, |
| 411 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | |
| 412 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x000C, 0x0000, 0x0008, | |
| 413 0x0000, 0x0000, | |
| 414 }; | 411 }; |
| 415 static FX_BOOL IsLatin(FX_WORD word) { | 412 |
| 416 if (word <= 0x007F) { | 413 static bool IsLatin(FX_WORD word) { |
| 417 if (special_chars[word] & 0x0001) { | 414 if (word <= 0x007F) |
| 418 return TRUE; | 415 return !!(special_chars[word] & 0x01); |
| 419 } | 416 |
| 420 } | 417 return ((word >= 0x00C0 && word <= 0x00FF) || |
| 421 if ((word >= 0x00C0 && word <= 0x00FF) || | 418 (word >= 0x0100 && word <= 0x024F) || |
| 422 (word >= 0x0100 && word <= 0x024F) || | 419 (word >= 0x1E00 && word <= 0x1EFF) || |
| 423 (word >= 0x1E00 && word <= 0x1EFF) || | 420 (word >= 0x2C60 && word <= 0x2C7F) || |
| 424 (word >= 0x2C60 && word <= 0x2C7F) || | 421 (word >= 0xA720 && word <= 0xA7FF) || |
| 425 (word >= 0xA720 && word <= 0xA7FF) || | 422 (word >= 0xFF21 && word <= 0xFF3A) || |
| 426 (word >= 0xFF21 && word <= 0xFF3A) || | 423 (word >= 0xFF41 && word <= 0xFF5A)); |
| 427 (word >= 0xFF41 && word <= 0xFF5A)) { | |
| 428 return TRUE; | |
| 429 } | |
| 430 return FALSE; | |
| 431 } | 424 } |
| 432 static FX_BOOL IsDigit(FX_DWORD word) { | 425 |
| 426 static bool IsDigit(FX_DWORD word) { |
| 433 return word >= 0x0030 && word <= 0x0039; | 427 return word >= 0x0030 && word <= 0x0039; |
| 434 } | 428 } |
| 435 static FX_BOOL IsCJK(FX_DWORD word) { | 429 |
| 430 static bool IsCJK(FX_DWORD word) { |
| 436 if ((word >= 0x1100 && word <= 0x11FF) || | 431 if ((word >= 0x1100 && word <= 0x11FF) || |
| 437 (word >= 0x2E80 && word <= 0x2FFF) || | 432 (word >= 0x2E80 && word <= 0x2FFF) || |
| 438 (word >= 0x3040 && word <= 0x9FBF) || | 433 (word >= 0x3040 && word <= 0x9FBF) || |
| 439 (word >= 0xAC00 && word <= 0xD7AF) || | 434 (word >= 0xAC00 && word <= 0xD7AF) || |
| 440 (word >= 0xF900 && word <= 0xFAFF) || | 435 (word >= 0xF900 && word <= 0xFAFF) || |
| 441 (word >= 0xFE30 && word <= 0xFE4F) || | 436 (word >= 0xFE30 && word <= 0xFE4F) || |
| 442 (word >= 0x20000 && word <= 0x2A6DF) || | 437 (word >= 0x20000 && word <= 0x2A6DF) || |
| 443 (word >= 0x2F800 && word <= 0x2FA1F)) { | 438 (word >= 0x2F800 && word <= 0x2FA1F)) { |
| 444 return TRUE; | 439 return true; |
| 445 } | 440 } |
| 446 if (word >= 0x3000 && word <= 0x303F) { | 441 if (word >= 0x3000 && word <= 0x303F) { |
| 447 if (word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 || | 442 return ( |
| 443 word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 || |
| 448 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 || | 444 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 || |
| 449 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 || | 445 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 || |
| 450 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035) { | 446 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035); |
| 451 return TRUE; | |
| 452 } | |
| 453 return FALSE; | |
| 454 } | 447 } |
| 455 if (word >= 0xFF66 && word <= 0xFF9D) { | 448 return word >= 0xFF66 && word <= 0xFF9D; |
| 456 return TRUE; | 449 } |
| 450 |
| 451 static bool IsPunctuation(FX_DWORD word) { |
| 452 if (word <= 0x007F) |
| 453 return !!(special_chars[word] & 0x08); |
| 454 |
| 455 if (word >= 0x0080 && word <= 0x00FF) { |
| 456 return (word == 0x0082 || word == 0x0084 || word == 0x0085 || |
| 457 word == 0x0091 || word == 0x0092 || word == 0x0093 || |
| 458 word <= 0x0094 || word == 0x0096 || word == 0x00B4 || |
| 459 word == 0x00B8); |
| 457 } | 460 } |
| 458 return FALSE; | 461 |
| 459 } | 462 if (word >= 0x2000 && word <= 0x206F) { |
| 460 static FX_BOOL IsPunctuation(FX_DWORD word) { | 463 return ( |
| 461 if (word <= 0x007F) { | 464 word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 || |
| 462 if ((special_chars[word] >> 3) & 1) { | |
| 463 return TRUE; | |
| 464 } | |
| 465 } else if (word >= 0x0080 && word <= 0x00FF) { | |
| 466 if (word == 0x0082 || word == 0x0084 || word == 0x0085 || word == 0x0091 || | |
| 467 word == 0x0092 || word == 0x0093 || word <= 0x0094 || word == 0x0096 || | |
| 468 word == 0x00B4 || word == 0x00B8) { | |
| 469 return TRUE; | |
| 470 } | |
| 471 } else if (word >= 0x2000 && word <= 0x206F) { | |
| 472 if (word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 || | |
| 473 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B || | 465 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B || |
| 474 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F || | 466 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F || |
| 475 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 || | 467 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 || |
| 476 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D || | 468 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D || |
| 477 word == 0x203E || word == 0x2044) { | 469 word == 0x203E || word == 0x2044); |
| 478 return TRUE; | 470 } |
| 479 } | 471 |
| 480 } else if (word >= 0x3000 && word <= 0x303F) { | 472 if (word >= 0x3000 && word <= 0x303F) { |
| 481 if (word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 || | 473 return ( |
| 474 word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 || |
| 482 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C || | 475 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C || |
| 483 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 || | 476 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 || |
| 484 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 || | 477 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 || |
| 485 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A || | 478 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A || |
| 486 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F) { | 479 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F); |
| 487 return TRUE; | 480 } |
| 488 } | 481 |
| 489 } else if (word >= 0xFE50 && word <= 0xFE6F) { | 482 if (word >= 0xFE50 && word <= 0xFE6F) |
| 490 if ((word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63) { | 483 return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63; |
| 491 return TRUE; | 484 |
| 492 } | 485 if (word >= 0xFF00 && word <= 0xFFEF) { |
| 493 } else if (word >= 0xFF00 && word <= 0xFFEF) { | 486 return ( |
| 494 if (word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 || | 487 word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 || |
| 495 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F || | 488 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F || |
| 496 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B || | 489 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B || |
| 497 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C || | 490 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C || |
| 498 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 || | 491 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 || |
| 499 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F) { | 492 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F); |
| 500 return TRUE; | |
| 501 } | |
| 502 } | 493 } |
| 503 return FALSE; | 494 |
| 495 return false; |
| 504 } | 496 } |
| 505 static FX_BOOL IsConnectiveSymbol(FX_DWORD word) { | 497 |
| 506 if (word <= 0x007F) { | 498 static bool IsConnectiveSymbol(FX_DWORD word) { |
| 507 if ((special_chars[word] >> 5) & 1) { | 499 return word <= 0x007F && (special_chars[word] & 0x20); |
| 508 return TRUE; | |
| 509 } | |
| 510 } | |
| 511 return FALSE; | |
| 512 } | 500 } |
| 513 static FX_BOOL IsOpenStylePunctuation(FX_DWORD word) { | 501 |
| 514 if (word <= 0x007F) { | 502 static bool IsOpenStylePunctuation(FX_DWORD word) { |
| 515 if ((special_chars[word] >> 2) & 1) { | 503 if (word <= 0x007F) |
| 516 return TRUE; | 504 return !!(special_chars[word] & 0x04); |
| 517 } | 505 |
| 518 } else if (word == 0x300A || word == 0x300C || word == 0x300E || | 506 return (word == 0x300A || word == 0x300C || word == 0x300E || |
| 519 word == 0x3010 || word == 0x3014 || word == 0x3016 || | 507 word == 0x3010 || word == 0x3014 || word == 0x3016 || |
| 520 word == 0x3018 || word == 0x301A || word == 0xFF08 || | 508 word == 0x3018 || word == 0x301A || word == 0xFF08 || |
| 521 word == 0xFF3B || word == 0xFF5B || word == 0xFF62) { | 509 word == 0xFF3B || word == 0xFF5B || word == 0xFF62); |
| 522 return TRUE; | |
| 523 } | |
| 524 return FALSE; | |
| 525 } | 510 } |
| 526 static FX_BOOL IsCurrencySymbol(FX_WORD word) { | 511 |
| 527 if (word == 0x0024 || word == 0x0080 || word == 0x00A2 || word == 0x00A3 || | 512 static bool IsCurrencySymbol(FX_WORD word) { |
| 528 word == 0x00A4 || word == 0x00A5 || (word >= 0x20A0 && word <= 0x20CF) || | 513 return (word == 0x0024 || word == 0x0080 || word == 0x00A2 || |
| 529 word == 0xFE69 || word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 || | 514 word == 0x00A3 || word == 0x00A4 || word == 0x00A5 || |
| 530 word == 0xFFE5 || word == 0xFFE6) { | 515 (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 || |
| 531 return TRUE; | 516 word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 || |
| 532 } | 517 word == 0xFFE5 || word == 0xFFE6); |
| 533 return FALSE; | |
| 534 } | 518 } |
| 535 static FX_BOOL IsPrefixSymbol(FX_WORD word) { | 519 |
| 536 if (IsCurrencySymbol(word)) { | 520 static bool IsPrefixSymbol(FX_WORD word) { |
| 537 return TRUE; | 521 return IsCurrencySymbol(word) || word == 0x2116; |
| 538 } | |
| 539 if (word == 0x2116) { | |
| 540 return TRUE; | |
| 541 } | |
| 542 return FALSE; | |
| 543 } | 522 } |
| 544 static FX_BOOL IsSpace(FX_WORD word) { | 523 |
| 524 static bool IsSpace(FX_WORD word) { |
| 545 return word == 0x0020 || word == 0x3000; | 525 return word == 0x0020 || word == 0x3000; |
| 546 } | 526 } |
| 547 static FX_BOOL NeedDivision(FX_WORD prevWord, FX_WORD curWord) { | 527 |
| 528 static bool NeedDivision(FX_WORD prevWord, FX_WORD curWord) { |
| 548 if ((IsLatin(prevWord) || IsDigit(prevWord)) && | 529 if ((IsLatin(prevWord) || IsDigit(prevWord)) && |
| 549 (IsLatin(curWord) || IsDigit(curWord))) { | 530 (IsLatin(curWord) || IsDigit(curWord))) { |
| 550 return FALSE; | 531 return false; |
| 551 } | 532 } |
| 552 if (IsSpace(curWord) || IsPunctuation(curWord)) { | 533 if (IsSpace(curWord) || IsPunctuation(curWord)) { |
| 553 return FALSE; | 534 return false; |
| 554 } | 535 } |
| 555 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) { | 536 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) { |
| 556 return FALSE; | 537 return false; |
| 557 } | 538 } |
| 558 if (IsSpace(prevWord) || IsPunctuation(prevWord)) { | 539 if (IsSpace(prevWord) || IsPunctuation(prevWord)) { |
| 559 return TRUE; | 540 return true; |
| 560 } | 541 } |
| 561 if (IsPrefixSymbol(prevWord)) { | 542 if (IsPrefixSymbol(prevWord)) { |
| 562 return FALSE; | 543 return false; |
| 563 } | 544 } |
| 564 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) { | 545 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) { |
| 565 return TRUE; | 546 return true; |
| 566 } | 547 } |
| 567 if (IsCJK(prevWord)) { | 548 if (IsCJK(prevWord)) { |
| 568 return TRUE; | 549 return true; |
| 569 } | 550 } |
| 570 return FALSE; | 551 return false; |
| 571 } | 552 } |
| 553 |
| 572 void CTypeset::SplitLines(FX_BOOL bTypeset, FX_FLOAT fFontSize) { | 554 void CTypeset::SplitLines(FX_BOOL bTypeset, FX_FLOAT fFontSize) { |
| 573 ASSERT(m_pVT); | 555 ASSERT(m_pVT); |
| 574 ASSERT(m_pSection); | 556 ASSERT(m_pSection); |
| 575 int32_t nLineHead = 0; | 557 int32_t nLineHead = 0; |
| 576 int32_t nLineTail = 0; | 558 int32_t nLineTail = 0; |
| 577 FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f; | 559 FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f; |
| 578 FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f; | 560 FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f; |
| 579 FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f; | 561 FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f; |
| 580 FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f; | 562 FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f; |
| 581 int32_t nWordStartPos = 0; | 563 int32_t nWordStartPos = 0; |
| (...skipping 1234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1816 if (pSection->m_SecInfo.pSecProps) { | 1798 if (pSection->m_SecInfo.pSecProps) { |
| 1817 *pSection->m_SecInfo.pSecProps = section.SecProps; | 1799 *pSection->m_SecInfo.pSecProps = section.SecProps; |
| 1818 } | 1800 } |
| 1819 if (pSection->m_SecInfo.pWordProps) { | 1801 if (pSection->m_SecInfo.pWordProps) { |
| 1820 *pSection->m_SecInfo.pWordProps = section.WordProps; | 1802 *pSection->m_SecInfo.pWordProps = section.WordProps; |
| 1821 } | 1803 } |
| 1822 return TRUE; | 1804 return TRUE; |
| 1823 } | 1805 } |
| 1824 return FALSE; | 1806 return FALSE; |
| 1825 } | 1807 } |
| OLD | NEW |