Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: third_party/WebKit/Source/core/editing/EditingUtilities.cpp

Issue 1833413002: [All-in-one patch] Implement own grapheme boundary breaker for editing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed comments Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions 5 * modification, are permitted provided that the following conditions
6 * are met: 6 * are met:
7 * 1. Redistributions of source code must retain the above copyright 7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer. 8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright 9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the 10 * notice, this list of conditions and the following disclaimer in the
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 #include "core/frame/UseCounter.h" 45 #include "core/frame/UseCounter.h"
46 #include "core/html/HTMLBRElement.h" 46 #include "core/html/HTMLBRElement.h"
47 #include "core/html/HTMLDivElement.h" 47 #include "core/html/HTMLDivElement.h"
48 #include "core/html/HTMLLIElement.h" 48 #include "core/html/HTMLLIElement.h"
49 #include "core/html/HTMLParagraphElement.h" 49 #include "core/html/HTMLParagraphElement.h"
50 #include "core/html/HTMLSpanElement.h" 50 #include "core/html/HTMLSpanElement.h"
51 #include "core/html/HTMLTableCellElement.h" 51 #include "core/html/HTMLTableCellElement.h"
52 #include "core/html/HTMLUListElement.h" 52 #include "core/html/HTMLUListElement.h"
53 #include "core/layout/LayoutObject.h" 53 #include "core/layout/LayoutObject.h"
54 #include "core/layout/LayoutTableCell.h" 54 #include "core/layout/LayoutTableCell.h"
55 #include "platform/fonts/Character.h"
56 #include "third_party/icu/source/common/unicode/uchar.h"
57 #include "third_party/icu/source/common/unicode/utf16.h"
55 #include "wtf/Assertions.h" 58 #include "wtf/Assertions.h"
56 #include "wtf/StdLibExtras.h" 59 #include "wtf/StdLibExtras.h"
57 #include "wtf/text/StringBuilder.h" 60 #include "wtf/text/StringBuilder.h"
58 61
62 #include <algorithm>
63
59 namespace blink { 64 namespace blink {
60 65
61 using namespace HTMLNames; 66 using namespace HTMLNames;
62 67
63 // Atomic means that the node has no children, or has children which are ignored for the 68 // Atomic means that the node has no children, or has children which are ignored for the
64 // purposes of editing. 69 // purposes of editing.
65 bool isAtomicNode(const Node *node) 70 bool isAtomicNode(const Node *node)
66 { 71 {
67 return node && (!node->hasChildren() || editingIgnoresContent(node)); 72 return node && (!node->hasChildren() || editingIgnoresContent(node));
68 } 73 }
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after
535 Position lastEditablePositionBeforePositionInRoot(const Position& position, Node & highestRoot) 540 Position lastEditablePositionBeforePositionInRoot(const Position& position, Node & highestRoot)
536 { 541 {
537 return lastEditablePositionBeforePositionInRootAlgorithm<EditingStrategy>(po sition, highestRoot); 542 return lastEditablePositionBeforePositionInRootAlgorithm<EditingStrategy>(po sition, highestRoot);
538 } 543 }
539 544
540 PositionInFlatTree lastEditablePositionBeforePositionInRoot(const PositionInFlat Tree& position, Node& highestRoot) 545 PositionInFlatTree lastEditablePositionBeforePositionInRoot(const PositionInFlat Tree& position, Node& highestRoot)
541 { 546 {
542 return lastEditablePositionBeforePositionInRootAlgorithm<EditingInFlatTreeSt rategy>(position, highestRoot); 547 return lastEditablePositionBeforePositionInRootAlgorithm<EditingInFlatTreeSt rategy>(position, highestRoot);
543 } 548 }
544 549
550 // Returns true if the code point has Glue_After_Zwj grapheme break property.
551 // See http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty-9 .0.0d18.txt
552 bool isGlueAfterZwj(uint32_t codePoint)
553 {
554 return codePoint == 0x2764 // HEAVY BLACK HEART.
555 || codePoint == 0x1F48B // KISS MARK.
556 || codePoint == 0x1F5E8; // LEFT_SPEECH BUBBLE.
557 }
558
559 // Returns true if the code point has E_Basae_GAZ grapheme break property.
560 // See http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty-9 .0.0d18.txt
561 bool isEBaseGAZ(uint32_t codePoint)
562 {
563 return codePoint >= 0x1F466 && codePoint <= 0x1F469;
564 }
565
566 // The list of code points which has Indic_Syllabic_Category=Virama property.
567 // Must be sorted.
568 const uint32_t kIndicSyllabicCategoryViramaList[] = {
569 0x094D, 0x09CD, 0x0A4D, 0x0ACD, 0x0B4D, 0x0BCD, 0x0C4D, 0x0CCD, 0x0D4D, 0x0D CA, 0x1B44, 0xA8C4,
570 0xA9C0, 0x11046, 0x110B9, 0x111C0, 0x11235, 0x1134D, 0x11442, 0x114C2, 0x115 BF, 0x1163F,
571 0x116B6, 0x11C3F,
572 };
573
574 // Returns true if the code point has Indic_Syllabic_Category=Virama property.
575 bool isIndicSyllabicCategoryVirama(uint32_t codePoint)
576 {
577 const int length = WTF_ARRAY_LENGTH(kIndicSyllabicCategoryViramaList);
578 return std::binary_search(kIndicSyllabicCategoryViramaList,
579 kIndicSyllabicCategoryViramaList + length,
580 codePoint);
581 }
582
583 bool isGraphemeBreak(const uint16_t* buf, int length, int offset)
yosin_UTC9 2016/03/29 01:27:30 Can we make this in state machine as BackspaceStat
584 {
585 DCHECK_GE(length, 0);
586 DCHECK_GE(offset, 0);
587 DCHECK_LE(offset, length);
588 // The following breaking rules come from Unicode Standard Annex #29 on Unic ode Text Segmaentation.
589 // See http://www.unicode.org/reports/tr29/
590 // Note that some of rules are in proposal. also see http://www.unicode.org/ reports/tr29/proposed.html
591 const uint32_t ZWJ = 0x200D;
592
593 // Rule1 GB1 sot ÷
594 // Rule2 GB2 ÷ eot
595 if (offset <= 0 || offset >= length)
596 return true;
597 if (U16_IS_TRAIL(buf[offset])) {
598 // Don't break a surrogate pair, but break lonely trailing surrogate.
599 return !U16_IS_LEAD(buf[offset - 1]);
600 }
601 uint32_t prevCodePoint = 0;
602 uint32_t nextCodePoint = 0;
603 int offsetBack = offset;
604 U16_PREV(buf, 0, offsetBack, prevCodePoint);
605 U16_NEXT(buf, offset, length, nextCodePoint);
606 int prevProp = u_getIntPropertyValue(prevCodePoint, UCHAR_GRAPHEME_CLUSTER_B REAK);
607 int nextProp = u_getIntPropertyValue(nextCodePoint, UCHAR_GRAPHEME_CLUSTER_B REAK);
608
609 // Rule GB3, CR x LF
610 if (prevProp == U_GCB_CR && nextProp == U_GCB_LF)
611 return false;
612
613 // Rule GB4, (Control | CR | LF) ÷
614 if (prevProp == U_GCB_CONTROL || prevProp == U_GCB_CR || prevProp == U_GCB_L F)
615 return true;
616
617 // Rule GB5, ÷ (Control | CR | LF)
618 if (nextProp == U_GCB_CONTROL || nextProp == U_GCB_CR || nextProp == U_GCB_L F)
619 return true;
620
621 // Rule GB6, L x (L | V | LV | LVT)
622 if (prevProp == U_GCB_L
623 && (nextProp == U_GCB_L || nextProp == U_GCB_V || nextProp == U_GCB_LV | | nextProp == U_GCB_LVT))
624 return false;
625
626 // Rule GB7, (LV | V) x (V | T)
627 if ((prevProp == U_GCB_LV || prevProp == U_GCB_V) && (nextProp == U_GCB_V || nextProp == U_GCB_T))
628 return false;
629
630 // Rule GB8, (LVT | T) x T
631 if ((prevProp == U_GCB_LVT || prevProp == U_GCB_T) && nextProp == U_GCB_T)
632 return false;
633
634 // Rule GB8a
635 //
636 // sot (RI RI)* RI x RI
637 // [^RI] (RI RI)* RI x RI
638 // RI ÷ RI
639 if (Character::isRegionalIndicator(prevCodePoint) && Character::isRegionalIn dicator(nextCodePoint)) {
640 while (offsetBack > 0) {
641 U16_PREV(buf, 0, offsetBack, prevCodePoint);
642 if (!Character::isRegionalIndicator(prevCodePoint)) {
643 offsetBack += U16_LENGTH(prevCodePoint);
644 break;
645 }
646 }
647
648 // Note that the offset has moved forwared 2 code units by U16_NEXT.
649 // The number 4 comes from the number of code units in a whole flag.
650 return (offset - 2 - offsetBack) % 4 == 0;
651 }
652
653 // Rule GB9, x (Extend | ZWJ)
654 // Rule GB9a, x SpacingMark
655 if (nextProp == U_GCB_EXTEND || nextCodePoint == ZWJ || nextProp == U_GCB_SP ACING_MARK)
656 return false;
657
658 // Rule GB9b, Prepend x
659 if (prevProp == U_GCB_PREPEND)
660 return false;
661
662 // Cluster Indic syllables together.
663 if (isIndicSyllabicCategoryVirama(prevCodePoint)
664 && u_getIntPropertyValue(nextCodePoint, UCHAR_GENERAL_CATEGORY) == U_OTH ER_LETTER)
665 return false;
666
667 // Proposed Rule GB10, (E_Base | EBG) x E_Modifier
668 if ((Character::isEmojiModifierBase(prevCodePoint) || isEBaseGAZ(prevCodePoi nt))
669 && Character::isModifier(nextCodePoint))
670 return false;
671
672 // Proposed Rule GB11, ZWJ x (Glue_After_Zwj | EBG)
673 if (prevCodePoint == ZWJ && (isGlueAfterZwj(nextCodePoint) || isEBaseGAZ(nex tCodePoint)))
674 return false;
675
676 // Rule GB999 any ÷ any
677 return true;
678 }
679
545 int uncheckedPreviousOffset(const Node* node, int current) 680 int uncheckedPreviousOffset(const Node* node, int current)
546 { 681 {
682 DCHECK_GE(current, 0);
547 if (!node->isTextNode()) 683 if (!node->isTextNode())
548 return current - 1; 684 return current - 1;
685 if (current <= 1)
686 return current - 1; // It's fine to return -1;
549 const String& text = toText(node)->data(); 687 const String& text = toText(node)->data();
550 if (text.is8Bit()) 688 if (text.is8Bit()) {
551 return current - 1; // TODO(nona): Good to support CR x LF. 689 const uint8_t* buf = text.characters8();
552 TextBreakIterator* iterator = cursorMovementIterator(text.characters16(), te xt.length()); 690 --current;
553 if (!iterator) 691 if (buf[current - 1] == 0x0D && buf[current] == 0x0A)
554 return current - 1; 692 --current;
555 const int result = iterator->preceding(current); 693 return current;
556 return result == TextBreakDone ? current - 1 : result; 694 }
695 const uint16_t* buf = text.characters16();
696 const int length = text.length();
697 while (!isGraphemeBreak(buf, length, --current)) {}
698 return current;
557 } 699 }
558 700
559 static int uncheckedPreviousOffsetForBackwardDeletion(const Node* n, int current ) 701 static int uncheckedPreviousOffsetForBackwardDeletion(const Node* n, int current )
560 { 702 {
561 return n->layoutObject() ? n->layoutObject()->previousOffsetForBackwardDelet ion(current) : current - 1; 703 return n->layoutObject() ? n->layoutObject()->previousOffsetForBackwardDelet ion(current) : current - 1;
562 } 704 }
563 705
564 int uncheckedNextOffset(const Node* node, int current) 706 int uncheckedNextOffset(const Node* node, int current)
565 { 707 {
708 DCHECK_GE(current, 0);
566 if (!node->isTextNode()) 709 if (!node->isTextNode())
567 return current + 1; 710 return current + 1;
568 const String& text = toText(node)->data(); 711 const String& text = toText(node)->data();
569 if (text.is8Bit()) 712 const int length = text.length();
570 return current + 1; // TODO(nona): Good to support CR x LF. 713 if (current >= (length - 1))
571 TextBreakIterator* iterator = cursorMovementIterator(text.characters16(), te xt.length()); 714 return current + 1; // It's fine to return length + 1;
572 if (!iterator) 715 if (text.is8Bit()) {
573 return current + 1; 716 const uint8_t* buf = text.characters8();
574 const int result = iterator->following(current); 717 ++current;
575 return result == TextBreakDone ? current + 1 : result; 718 if (buf[current - 1] == 0x0D && buf[current] == 0x0A)
719 ++current;
720 return current;
721 }
722 const uint16_t* buf = text.characters16();
723 while (!isGraphemeBreak(buf, length, ++current)) {}
724 return current;
576 } 725 }
577 726
578 template <typename Strategy> 727 template <typename Strategy>
579 PositionTemplate<Strategy> previousPositionOfAlgorithm(const PositionTemplate<St rategy>& position, PositionMoveType moveType) 728 PositionTemplate<Strategy> previousPositionOfAlgorithm(const PositionTemplate<St rategy>& position, PositionMoveType moveType)
580 { 729 {
581 Node* const node = position.anchorNode(); 730 Node* const node = position.anchorNode();
582 if (!node) 731 if (!node)
583 return position; 732 return position;
584 733
585 const int offset = position.computeEditingOffset(); 734 const int offset = position.computeEditingOffset();
(...skipping 1078 matching lines...) Expand 10 before | Expand all | Expand 10 after
1664 // instead of possibly at the end of the last node before the selection 1813 // instead of possibly at the end of the last node before the selection
1665 return mostForwardCaretPosition(visiblePosition.deepEquivalent()); 1814 return mostForwardCaretPosition(visiblePosition.deepEquivalent());
1666 } 1815 }
1667 1816
1668 bool isTextSecurityNode(const Node* node) 1817 bool isTextSecurityNode(const Node* node)
1669 { 1818 {
1670 return node && node->layoutObject() && node->layoutObject()->style()->textSe curity() != TSNONE; 1819 return node && node->layoutObject() && node->layoutObject()->style()->textSe curity() != TSNONE;
1671 } 1820 }
1672 1821
1673 } // namespace blink 1822 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/DEPS ('k') | third_party/WebKit/Source/core/editing/EditingUtilitiesTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698