OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "core/fpdfdoc/pdf_vt.h" | 9 #include "core/fpdfdoc/pdf_vt.h" |
10 #include "core/include/fpdfdoc/fpdf_doc.h" | 10 #include "core/include/fpdfdoc/fpdf_doc.h" |
(...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
388 return CPVT_Size(m_rcRet.Width(), m_rcRet.Height()); | 388 return CPVT_Size(m_rcRet.Width(), m_rcRet.Height()); |
389 } | 389 } |
390 CPVT_FloatRect CTypeset::Typeset() { | 390 CPVT_FloatRect CTypeset::Typeset() { |
391 ASSERT(m_pVT); | 391 ASSERT(m_pVT); |
392 m_pSection->m_LineArray.Empty(); | 392 m_pSection->m_LineArray.Empty(); |
393 SplitLines(TRUE, 0.0f); | 393 SplitLines(TRUE, 0.0f); |
394 m_pSection->m_LineArray.Clear(); | 394 m_pSection->m_LineArray.Clear(); |
395 OutputLines(); | 395 OutputLines(); |
396 return m_rcRet; | 396 return m_rcRet; |
397 } | 397 } |
398 static int special_chars[128] = { | 398 |
399 0x0000, 0x000C, 0x0008, 0x000C, 0x0008, 0x0000, 0x0020, 0x0000, 0x0000, | 399 static const uint8_t special_chars[128] = { |
400 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | 400 0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, |
401 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | 401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
402 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0008, 0x0008, 0x0000, | 402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00, |
403 0x0010, 0x0000, 0x0000, 0x0028, 0x000C, 0x0008, 0x0000, 0x0000, 0x0028, | 403 0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28, |
404 0x0028, 0x0028, 0x0028, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, | 404 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08, |
405 0x0002, 0x0002, 0x0002, 0x0002, 0x0008, 0x0008, 0x0000, 0x0000, 0x0000, | 405 0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
406 0x0008, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 406 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
407 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 407 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, |
408 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 408 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
409 0x0001, 0x000C, 0x0000, 0x0008, 0x0000, 0x0000, 0x0000, 0x0001, 0x0001, | 409 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
410 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | 410 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00, |
411 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, | |
412 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x000C, 0x0000, 0x0008, | |
413 0x0000, 0x0000, | |
414 }; | 411 }; |
415 static FX_BOOL IsLatin(FX_WORD word) { | 412 |
416 if (word <= 0x007F) { | 413 static bool IsLatin(FX_WORD word) { |
417 if (special_chars[word] & 0x0001) { | 414 if (word <= 0x007F) |
418 return TRUE; | 415 return !!(special_chars[word] & 0x01); |
419 } | 416 |
420 } | 417 return ((word >= 0x00C0 && word <= 0x00FF) || |
421 if ((word >= 0x00C0 && word <= 0x00FF) || | 418 (word >= 0x0100 && word <= 0x024F) || |
422 (word >= 0x0100 && word <= 0x024F) || | 419 (word >= 0x1E00 && word <= 0x1EFF) || |
423 (word >= 0x1E00 && word <= 0x1EFF) || | 420 (word >= 0x2C60 && word <= 0x2C7F) || |
424 (word >= 0x2C60 && word <= 0x2C7F) || | 421 (word >= 0xA720 && word <= 0xA7FF) || |
425 (word >= 0xA720 && word <= 0xA7FF) || | 422 (word >= 0xFF21 && word <= 0xFF3A) || |
426 (word >= 0xFF21 && word <= 0xFF3A) || | 423 (word >= 0xFF41 && word <= 0xFF5A)); |
427 (word >= 0xFF41 && word <= 0xFF5A)) { | |
428 return TRUE; | |
429 } | |
430 return FALSE; | |
431 } | 424 } |
432 static FX_BOOL IsDigit(FX_DWORD word) { | 425 |
| 426 static bool IsDigit(FX_DWORD word) { |
433 return word >= 0x0030 && word <= 0x0039; | 427 return word >= 0x0030 && word <= 0x0039; |
434 } | 428 } |
435 static FX_BOOL IsCJK(FX_DWORD word) { | 429 |
| 430 static bool IsCJK(FX_DWORD word) { |
436 if ((word >= 0x1100 && word <= 0x11FF) || | 431 if ((word >= 0x1100 && word <= 0x11FF) || |
437 (word >= 0x2E80 && word <= 0x2FFF) || | 432 (word >= 0x2E80 && word <= 0x2FFF) || |
438 (word >= 0x3040 && word <= 0x9FBF) || | 433 (word >= 0x3040 && word <= 0x9FBF) || |
439 (word >= 0xAC00 && word <= 0xD7AF) || | 434 (word >= 0xAC00 && word <= 0xD7AF) || |
440 (word >= 0xF900 && word <= 0xFAFF) || | 435 (word >= 0xF900 && word <= 0xFAFF) || |
441 (word >= 0xFE30 && word <= 0xFE4F) || | 436 (word >= 0xFE30 && word <= 0xFE4F) || |
442 (word >= 0x20000 && word <= 0x2A6DF) || | 437 (word >= 0x20000 && word <= 0x2A6DF) || |
443 (word >= 0x2F800 && word <= 0x2FA1F)) { | 438 (word >= 0x2F800 && word <= 0x2FA1F)) { |
444 return TRUE; | 439 return true; |
445 } | 440 } |
446 if (word >= 0x3000 && word <= 0x303F) { | 441 if (word >= 0x3000 && word <= 0x303F) { |
447 if (word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 || | 442 return ( |
| 443 word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 || |
448 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 || | 444 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 || |
449 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 || | 445 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 || |
450 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035) { | 446 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035); |
451 return TRUE; | |
452 } | |
453 return FALSE; | |
454 } | 447 } |
455 if (word >= 0xFF66 && word <= 0xFF9D) { | 448 return word >= 0xFF66 && word <= 0xFF9D; |
456 return TRUE; | 449 } |
| 450 |
| 451 static bool IsPunctuation(FX_DWORD word) { |
| 452 if (word <= 0x007F) |
| 453 return !!(special_chars[word] & 0x08); |
| 454 |
| 455 if (word >= 0x0080 && word <= 0x00FF) { |
| 456 return (word == 0x0082 || word == 0x0084 || word == 0x0085 || |
| 457 word == 0x0091 || word == 0x0092 || word == 0x0093 || |
| 458 word <= 0x0094 || word == 0x0096 || word == 0x00B4 || |
| 459 word == 0x00B8); |
457 } | 460 } |
458 return FALSE; | 461 |
459 } | 462 if (word >= 0x2000 && word <= 0x206F) { |
460 static FX_BOOL IsPunctuation(FX_DWORD word) { | 463 return ( |
461 if (word <= 0x007F) { | 464 word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 || |
462 if ((special_chars[word] >> 3) & 1) { | |
463 return TRUE; | |
464 } | |
465 } else if (word >= 0x0080 && word <= 0x00FF) { | |
466 if (word == 0x0082 || word == 0x0084 || word == 0x0085 || word == 0x0091 || | |
467 word == 0x0092 || word == 0x0093 || word <= 0x0094 || word == 0x0096 || | |
468 word == 0x00B4 || word == 0x00B8) { | |
469 return TRUE; | |
470 } | |
471 } else if (word >= 0x2000 && word <= 0x206F) { | |
472 if (word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 || | |
473 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B || | 465 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B || |
474 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F || | 466 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F || |
475 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 || | 467 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 || |
476 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D || | 468 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D || |
477 word == 0x203E || word == 0x2044) { | 469 word == 0x203E || word == 0x2044); |
478 return TRUE; | 470 } |
479 } | 471 |
480 } else if (word >= 0x3000 && word <= 0x303F) { | 472 if (word >= 0x3000 && word <= 0x303F) { |
481 if (word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 || | 473 return ( |
| 474 word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 || |
482 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C || | 475 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C || |
483 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 || | 476 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 || |
484 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 || | 477 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 || |
485 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A || | 478 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A || |
486 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F) { | 479 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F); |
487 return TRUE; | 480 } |
488 } | 481 |
489 } else if (word >= 0xFE50 && word <= 0xFE6F) { | 482 if (word >= 0xFE50 && word <= 0xFE6F) |
490 if ((word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63) { | 483 return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63; |
491 return TRUE; | 484 |
492 } | 485 if (word >= 0xFF00 && word <= 0xFFEF) { |
493 } else if (word >= 0xFF00 && word <= 0xFFEF) { | 486 return ( |
494 if (word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 || | 487 word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 || |
495 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F || | 488 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F || |
496 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B || | 489 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B || |
497 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C || | 490 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C || |
498 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 || | 491 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 || |
499 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F) { | 492 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F); |
500 return TRUE; | |
501 } | |
502 } | 493 } |
503 return FALSE; | 494 |
| 495 return false; |
504 } | 496 } |
505 static FX_BOOL IsConnectiveSymbol(FX_DWORD word) { | 497 |
506 if (word <= 0x007F) { | 498 static bool IsConnectiveSymbol(FX_DWORD word) { |
507 if ((special_chars[word] >> 5) & 1) { | 499 return word <= 0x007F && (special_chars[word] & 0x20); |
508 return TRUE; | |
509 } | |
510 } | |
511 return FALSE; | |
512 } | 500 } |
513 static FX_BOOL IsOpenStylePunctuation(FX_DWORD word) { | 501 |
514 if (word <= 0x007F) { | 502 static bool IsOpenStylePunctuation(FX_DWORD word) { |
515 if ((special_chars[word] >> 2) & 1) { | 503 if (word <= 0x007F) |
516 return TRUE; | 504 return !!(special_chars[word] & 0x04); |
517 } | 505 |
518 } else if (word == 0x300A || word == 0x300C || word == 0x300E || | 506 return (word == 0x300A || word == 0x300C || word == 0x300E || |
519 word == 0x3010 || word == 0x3014 || word == 0x3016 || | 507 word == 0x3010 || word == 0x3014 || word == 0x3016 || |
520 word == 0x3018 || word == 0x301A || word == 0xFF08 || | 508 word == 0x3018 || word == 0x301A || word == 0xFF08 || |
521 word == 0xFF3B || word == 0xFF5B || word == 0xFF62) { | 509 word == 0xFF3B || word == 0xFF5B || word == 0xFF62); |
522 return TRUE; | |
523 } | |
524 return FALSE; | |
525 } | 510 } |
526 static FX_BOOL IsCurrencySymbol(FX_WORD word) { | 511 |
527 if (word == 0x0024 || word == 0x0080 || word == 0x00A2 || word == 0x00A3 || | 512 static bool IsCurrencySymbol(FX_WORD word) { |
528 word == 0x00A4 || word == 0x00A5 || (word >= 0x20A0 && word <= 0x20CF) || | 513 return (word == 0x0024 || word == 0x0080 || word == 0x00A2 || |
529 word == 0xFE69 || word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 || | 514 word == 0x00A3 || word == 0x00A4 || word == 0x00A5 || |
530 word == 0xFFE5 || word == 0xFFE6) { | 515 (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 || |
531 return TRUE; | 516 word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 || |
532 } | 517 word == 0xFFE5 || word == 0xFFE6); |
533 return FALSE; | |
534 } | 518 } |
535 static FX_BOOL IsPrefixSymbol(FX_WORD word) { | 519 |
536 if (IsCurrencySymbol(word)) { | 520 static bool IsPrefixSymbol(FX_WORD word) { |
537 return TRUE; | 521 return IsCurrencySymbol(word) || word == 0x2116; |
538 } | |
539 if (word == 0x2116) { | |
540 return TRUE; | |
541 } | |
542 return FALSE; | |
543 } | 522 } |
544 static FX_BOOL IsSpace(FX_WORD word) { | 523 |
| 524 static bool IsSpace(FX_WORD word) { |
545 return word == 0x0020 || word == 0x3000; | 525 return word == 0x0020 || word == 0x3000; |
546 } | 526 } |
547 static FX_BOOL NeedDivision(FX_WORD prevWord, FX_WORD curWord) { | 527 |
| 528 static bool NeedDivision(FX_WORD prevWord, FX_WORD curWord) { |
548 if ((IsLatin(prevWord) || IsDigit(prevWord)) && | 529 if ((IsLatin(prevWord) || IsDigit(prevWord)) && |
549 (IsLatin(curWord) || IsDigit(curWord))) { | 530 (IsLatin(curWord) || IsDigit(curWord))) { |
550 return FALSE; | 531 return false; |
551 } | 532 } |
552 if (IsSpace(curWord) || IsPunctuation(curWord)) { | 533 if (IsSpace(curWord) || IsPunctuation(curWord)) { |
553 return FALSE; | 534 return false; |
554 } | 535 } |
555 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) { | 536 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) { |
556 return FALSE; | 537 return false; |
557 } | 538 } |
558 if (IsSpace(prevWord) || IsPunctuation(prevWord)) { | 539 if (IsSpace(prevWord) || IsPunctuation(prevWord)) { |
559 return TRUE; | 540 return true; |
560 } | 541 } |
561 if (IsPrefixSymbol(prevWord)) { | 542 if (IsPrefixSymbol(prevWord)) { |
562 return FALSE; | 543 return false; |
563 } | 544 } |
564 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) { | 545 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) { |
565 return TRUE; | 546 return true; |
566 } | 547 } |
567 if (IsCJK(prevWord)) { | 548 if (IsCJK(prevWord)) { |
568 return TRUE; | 549 return true; |
569 } | 550 } |
570 return FALSE; | 551 return false; |
571 } | 552 } |
| 553 |
572 void CTypeset::SplitLines(FX_BOOL bTypeset, FX_FLOAT fFontSize) { | 554 void CTypeset::SplitLines(FX_BOOL bTypeset, FX_FLOAT fFontSize) { |
573 ASSERT(m_pVT); | 555 ASSERT(m_pVT); |
574 ASSERT(m_pSection); | 556 ASSERT(m_pSection); |
575 int32_t nLineHead = 0; | 557 int32_t nLineHead = 0; |
576 int32_t nLineTail = 0; | 558 int32_t nLineTail = 0; |
577 FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f; | 559 FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f; |
578 FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f; | 560 FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f; |
579 FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f; | 561 FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f; |
580 FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f; | 562 FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f; |
581 int32_t nWordStartPos = 0; | 563 int32_t nWordStartPos = 0; |
(...skipping 1234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1816 if (pSection->m_SecInfo.pSecProps) { | 1798 if (pSection->m_SecInfo.pSecProps) { |
1817 *pSection->m_SecInfo.pSecProps = section.SecProps; | 1799 *pSection->m_SecInfo.pSecProps = section.SecProps; |
1818 } | 1800 } |
1819 if (pSection->m_SecInfo.pWordProps) { | 1801 if (pSection->m_SecInfo.pWordProps) { |
1820 *pSection->m_SecInfo.pWordProps = section.WordProps; | 1802 *pSection->m_SecInfo.pWordProps = section.WordProps; |
1821 } | 1803 } |
1822 return TRUE; | 1804 return TRUE; |
1823 } | 1805 } |
1824 return FALSE; | 1806 return FALSE; |
1825 } | 1807 } |
OLD | NEW |