Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(758)

Side by Side Diff: third_party/WebKit/Source/core/editing/EditingUtilities.cpp

Issue 1833413002: [All-in-one patch] Implement own grapheme boundary breaker for editing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions 5 * modification, are permitted provided that the following conditions
6 * are met: 6 * are met:
7 * 1. Redistributions of source code must retain the above copyright 7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer. 8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright 9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the 10 * notice, this list of conditions and the following disclaimer in the
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 #include "core/frame/UseCounter.h" 45 #include "core/frame/UseCounter.h"
46 #include "core/html/HTMLBRElement.h" 46 #include "core/html/HTMLBRElement.h"
47 #include "core/html/HTMLDivElement.h" 47 #include "core/html/HTMLDivElement.h"
48 #include "core/html/HTMLLIElement.h" 48 #include "core/html/HTMLLIElement.h"
49 #include "core/html/HTMLParagraphElement.h" 49 #include "core/html/HTMLParagraphElement.h"
50 #include "core/html/HTMLSpanElement.h" 50 #include "core/html/HTMLSpanElement.h"
51 #include "core/html/HTMLTableCellElement.h" 51 #include "core/html/HTMLTableCellElement.h"
52 #include "core/html/HTMLUListElement.h" 52 #include "core/html/HTMLUListElement.h"
53 #include "core/layout/LayoutObject.h" 53 #include "core/layout/LayoutObject.h"
54 #include "core/layout/LayoutTableCell.h" 54 #include "core/layout/LayoutTableCell.h"
55 #include "platform/fonts/Character.h"
56 #include "third_party/icu/source/common/unicode/uchar.h"
57 #include "third_party/icu/source/common/unicode/utf16.h"
55 #include "wtf/Assertions.h" 58 #include "wtf/Assertions.h"
56 #include "wtf/StdLibExtras.h" 59 #include "wtf/StdLibExtras.h"
57 #include "wtf/text/StringBuilder.h" 60 #include "wtf/text/StringBuilder.h"
58 61
62 #include <algorithm>
63
59 namespace blink { 64 namespace blink {
60 65
61 using namespace HTMLNames; 66 using namespace HTMLNames;
62 67
63 // Atomic means that the node has no children, or has children which are ignored for the 68 // Atomic means that the node has no children, or has children which are ignored for the
64 // purposes of editing. 69 // purposes of editing.
65 bool isAtomicNode(const Node *node) 70 bool isAtomicNode(const Node *node)
66 { 71 {
67 return node && (!node->hasChildren() || editingIgnoresContent(node)); 72 return node && (!node->hasChildren() || editingIgnoresContent(node));
68 } 73 }
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after
535 Position lastEditablePositionBeforePositionInRoot(const Position& position, Node & highestRoot) 540 Position lastEditablePositionBeforePositionInRoot(const Position& position, Node & highestRoot)
536 { 541 {
537 return lastEditablePositionBeforePositionInRootAlgorithm<EditingStrategy>(po sition, highestRoot); 542 return lastEditablePositionBeforePositionInRootAlgorithm<EditingStrategy>(po sition, highestRoot);
538 } 543 }
539 544
540 PositionInFlatTree lastEditablePositionBeforePositionInRoot(const PositionInFlat Tree& position, Node& highestRoot) 545 PositionInFlatTree lastEditablePositionBeforePositionInRoot(const PositionInFlat Tree& position, Node& highestRoot)
541 { 546 {
542 return lastEditablePositionBeforePositionInRootAlgorithm<EditingInFlatTreeSt rategy>(position, highestRoot); 547 return lastEditablePositionBeforePositionInRootAlgorithm<EditingInFlatTreeSt rategy>(position, highestRoot);
543 } 548 }
544 549
550 // Returns true if the code point has Glue_After_Zwj grapheme break property.
551 // See http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty-9 .0.0d18.txt
552 bool isGlueAfterZwj(uint32_t codePoint)
553 {
554 return codePoint == 0x2764 // HEAVY BLACK HEART.
555 || codePoint == 0x1F48B // KISS MARK.
556 || codePoint == 0x1F5E8; // LEFT_SPEECH BUBBLE.
557 }
558
559 // Returns true if the code point has E_Basae_GAZ grapheme break property.
560 // See http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty-9 .0.0d18.txt
561 bool isEBaseGAZ(uint32_t codePoint)
562 {
563 return codePoint >= 0x1F466 && codePoint <= 0x1F469;
564 }
565
566 // The list of code points which has Indic_Syllabic_Category=Virama property.
567 // Must be sorted.
568 const uint32_t kIndicSyllabicCategoryViramaList[] = {
569 0x094D, 0x09CD, 0x0A4D, 0x0ACD, 0x0B4D, 0x0BCD, 0x0C4D, 0x0CCD, 0x0D4D, 0x0D CA, 0x1B44, 0xA8C4,
570 0xA9C0, 0x11046, 0x110B9, 0x111C0, 0x11235, 0x1134D, 0x11442, 0x114C2, 0x115 BF, 0x1163F,
571 0x116B6, 0x11C3F,
572 };
573
574 // Returns true if the code point has Indic_Syllabic_Category=Virama property.
575 bool isIndicSyllabicCategoryVirama(uint32_t codePoint)
576 {
577 const int length =
yosin_UTC9 2016/03/28 09:34:49 Please use |WTF_ARRAY_LENGTH(array)| in "wtf/StdLi
Seigo Nonaka 2016/03/28 13:42:18 Done.
578 sizeof(kIndicSyllabicCategoryViramaList) / sizeof(kIndicSyllabicCategory ViramaList[0]);
579 return std::binary_search(kIndicSyllabicCategoryViramaList,
580 kIndicSyllabicCategoryViramaList + length,
581 codePoint);
582 }
583
584 bool isGraphemeBreak(const uint16_t* buf, int length, int offset)
585 {
586 // The following breaking rules come from Unicode Standard Annex #29 on Unic ode Text Segmaentation.
yosin_UTC9 2016/03/28 09:34:49 Could you add following |DCHECK()|? DCHECK_GE(len
Seigo Nonaka 2016/03/28 13:42:18 Done.
587 // See http://www.unicode.org/reports/tr29/
588 // Note that some of rules are in proposal. also see http://www.unicode.org/ reports/tr29/proposed.html
589 const uint32_t ZWJ = 0x200D;
590
591 // Rule1 GB1 sot ÷
592 // Rule2 GB2 ÷ eot
593 if (offset <= 0 || offset >= length) {
yosin_UTC9 2016/03/28 09:34:49 nit: No need to have |{}| for one line then-clause
Seigo Nonaka 2016/03/28 13:42:18 Done.
594 return true;
595 }
596 if (U16_IS_TRAIL(buf[offset])) {
597 // Don't break a surrogate pair, but break lonely trailing surrogate.
598 return !U16_IS_LEAD(buf[offset - 1]);
599 }
600 uint32_t prevCodePoint = 0;
601 uint32_t nextCodePoint = 0;
602 int offsetBack = offset;
603 U16_PREV(buf, 0, offsetBack, prevCodePoint);
604 U16_NEXT(buf, offset, length, nextCodePoint);
605 int prevProp = u_getIntPropertyValue(prevCodePoint, UCHAR_GRAPHEME_CLUSTER_B REAK);
606 int nextProp = u_getIntPropertyValue(nextCodePoint, UCHAR_GRAPHEME_CLUSTER_B REAK);
607
608 // Rule GB3, CR x LF
609 if (prevProp == U_GCB_CR && nextProp == U_GCB_LF) {
610 return false;
611 }
612
613 // Rule GB4, (Control | CR | LF) ÷
614 if (prevProp == U_GCB_CONTROL || prevProp == U_GCB_CR || prevProp == U_GCB_L F) {
615 return true;
616 }
617
618 // Rule GB5, ÷ (Control | CR | LF)
619 if (nextProp == U_GCB_CONTROL || nextProp == U_GCB_CR || nextProp == U_GCB_L F) {
620 return true;
621 }
622
623 // Rule GB6, L x (L | V | LV | LVT)
624 if (prevProp == U_GCB_L
625 && (nextProp == U_GCB_L || nextProp == U_GCB_V || nextProp == U_GCB_LV | | nextProp == U_GCB_LVT)) {
626 return false;
627 }
628
629 // Rule GB7, (LV | V) x (V | T)
630 if ((prevProp == U_GCB_LV || prevProp == U_GCB_V) && (nextProp == U_GCB_V || nextProp == U_GCB_T)) {
631 return false;
632 }
633
634 // Rule GB8, (LVT | T) x T
635 if ((prevProp == U_GCB_LVT || prevProp == U_GCB_T) && nextProp == U_GCB_T) {
636 return false;
637 }
638
639 // Rule GB8a
640 //
641 // sot (RI RI)* RI x RI
642 // [^RI] (RI RI)* RI x RI
643 // RI ÷ RI
644 if (Character::isRegionalIndicator(prevCodePoint) && Character::isRegionalIn dicator(nextCodePoint)) {
645 while (offsetBack > 0) {
646 U16_PREV(buf, 0, offsetBack, prevCodePoint);
647 if (!Character::isRegionalIndicator(prevCodePoint)) {
648 offsetBack += U16_LENGTH(prevCodePoint);
649 break;
650 }
651 }
652
653 // Note that the offset has moved forwared 2 code units by U16_NEXT.
654 // The number 4 comes from the number of code units in a whole flag.
655 return (offset - 2 - offsetBack) % 4 == 0;
656 }
657
658 // Rule GB9, x (Extend | ZWJ)
659 // Rule GB9a, x SpacingMark
660 if (nextProp == U_GCB_EXTEND || nextCodePoint == ZWJ || nextProp == U_GCB_SP ACING_MARK) {
661 return false;
662 }
663
664 // Rule GB9b, Prepend x
665 if (prevProp == U_GCB_PREPEND) {
666 return false;
667 }
668
669 // Cluster Indic syllables together.
670 if (isIndicSyllabicCategoryVirama(prevCodePoint)
671 && u_getIntPropertyValue(nextCodePoint, UCHAR_GENERAL_CATEGORY) == U_OTH ER_LETTER) {
672 return false;
673 }
674
675 // Proposed Rule GB10, (E_Base | EBG) x E_Modifier
676 if ((Character::isEmojiModifierBase(prevCodePoint) || isEBaseGAZ(prevCodePoi nt))
677 && Character::isModifier(nextCodePoint)) {
678 return false;
679 }
680
681 // Proposed Rule GB11, ZWJ x (Glue_After_Zwj | EBG)
682 if (prevCodePoint == ZWJ && (isGlueAfterZwj(nextCodePoint) || isEBaseGAZ(nex tCodePoint))) {
683 return false;
684 }
685
686 // Rule GB999 any ÷ any
687 return true;
688 }
689
545 int uncheckedPreviousOffset(const Node* node, int current) 690 int uncheckedPreviousOffset(const Node* node, int current)
546 { 691 {
547 if (!node->isTextNode()) 692 if (!node->isTextNode())
yosin_UTC9 2016/03/28 09:34:49 Could you add |DCHECK_GE(current, 0)|?
Seigo Nonaka 2016/03/28 13:42:18 Done.
548 return current - 1; 693 return current - 1;
694 if (current <= 1)
695 return current - 1; // It's fine to return -1;
549 const String& text = toText(node)->data(); 696 const String& text = toText(node)->data();
550 if (text.is8Bit()) 697 if (text.is8Bit()) {
551 return current - 1; // TODO(nona): Good to support CR x LF. 698 const uint8_t* buf = text.characters8();
552 TextBreakIterator* iterator = cursorMovementIterator(text.characters16(), te xt.length()); 699 --current;
553 if (!iterator) 700 if (buf[current - 1] == 0x0D && buf[current] == 0x0A)
554 return current - 1; 701 --current;
555 const int result = iterator->preceding(current); 702 return current;
556 return result == TextBreakDone ? current - 1 : result; 703 }
704 const uint16_t* buf = text.characters16();
705 const int length = text.length();
706 while (!isGraphemeBreak(buf, length, --current)) {}
yosin_UTC9 2016/03/28 09:34:49 MEMO: Google style allowed one line loop: https://
Seigo Nonaka 2016/03/28 13:42:18 Sure, but let me keep current style since neither
707 return current;
557 } 708 }
558 709
559 static int uncheckedPreviousOffsetForBackwardDeletion(const Node* n, int current ) 710 static int uncheckedPreviousOffsetForBackwardDeletion(const Node* n, int current )
560 { 711 {
561 return n->layoutObject() ? n->layoutObject()->previousOffsetForBackwardDelet ion(current) : current - 1; 712 return n->layoutObject() ? n->layoutObject()->previousOffsetForBackwardDelet ion(current) : current - 1;
562 } 713 }
563 714
564 int uncheckedNextOffset(const Node* node, int current) 715 int uncheckedNextOffset(const Node* node, int current)
565 { 716 {
566 if (!node->isTextNode()) 717 if (!node->isTextNode())
yosin_UTC9 2016/03/28 09:34:49 Could you add |DCHECK_GE(current, 0)|?
Seigo Nonaka 2016/03/28 13:42:18 Done.
567 return current + 1; 718 return current + 1;
568 const String& text = toText(node)->data(); 719 const String& text = toText(node)->data();
569 if (text.is8Bit()) 720 const int length = text.length();
570 return current + 1; // TODO(nona): Good to support CR x LF. 721 if (current >= (length - 1))
571 TextBreakIterator* iterator = cursorMovementIterator(text.characters16(), te xt.length()); 722 return current + 1; // It's fine to return length + 1;
572 if (!iterator) 723 if (text.is8Bit()) {
573 return current + 1; 724 const uint8_t* buf = text.characters8();
574 const int result = iterator->following(current); 725 ++current;
575 return result == TextBreakDone ? current + 1 : result; 726 if (buf[current - 1] == 0x0D && buf[current] == 0x0A)
727 ++current;
728 return current;
729 }
730 const uint16_t* buf = text.characters16();
731 while (!isGraphemeBreak(buf, length, ++current)) {}
732 return current;
576 } 733 }
577 734
578 template <typename Strategy> 735 template <typename Strategy>
579 PositionTemplate<Strategy> previousPositionOfAlgorithm(const PositionTemplate<St rategy>& position, PositionMoveType moveType) 736 PositionTemplate<Strategy> previousPositionOfAlgorithm(const PositionTemplate<St rategy>& position, PositionMoveType moveType)
580 { 737 {
581 Node* const node = position.anchorNode(); 738 Node* const node = position.anchorNode();
582 if (!node) 739 if (!node)
583 return position; 740 return position;
584 741
585 const int offset = position.computeEditingOffset(); 742 const int offset = position.computeEditingOffset();
(...skipping 1078 matching lines...) Expand 10 before | Expand all | Expand 10 after
1664 // instead of possibly at the end of the last node before the selection 1821 // instead of possibly at the end of the last node before the selection
1665 return mostForwardCaretPosition(visiblePosition.deepEquivalent()); 1822 return mostForwardCaretPosition(visiblePosition.deepEquivalent());
1666 } 1823 }
1667 1824
1668 bool isTextSecurityNode(const Node* node) 1825 bool isTextSecurityNode(const Node* node)
1669 { 1826 {
1670 return node && node->layoutObject() && node->layoutObject()->style()->textSe curity() != TSNONE; 1827 return node && node->layoutObject() && node->layoutObject()->style()->textSe curity() != TSNONE;
1671 } 1828 }
1672 1829
1673 } // namespace blink 1830 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698