OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2003, 2006, 2008, 2009, 2010, 2011 Apple Inc. All rights reserv
ed. | 2 * Copyright (C) 2003, 2006, 2008, 2009, 2010, 2011 Apple Inc. All rights reserv
ed. |
3 * Copyright (C) 2008 Holger Hans Peter Freyther | 3 * Copyright (C) 2008 Holger Hans Peter Freyther |
4 * Copyright (C) Research In Motion Limited 2011. All rights reserved. | 4 * Copyright (C) Research In Motion Limited 2011. All rights reserved. |
5 * | 5 * |
6 * This library is free software; you can redistribute it and/or | 6 * This library is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Library General Public | 7 * modify it under the terms of the GNU Library General Public |
8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
9 * version 2 of the License, or (at your option) any later version. | 9 * version 2 of the License, or (at your option) any later version. |
10 * | 10 * |
(...skipping 30 matching lines...) Expand all Loading... |
41 | 41 |
42 UTF16TextIterator::UTF16TextIterator(const UChar* characters, int currentCharact
er, int endOffset, int endCharacter) | 42 UTF16TextIterator::UTF16TextIterator(const UChar* characters, int currentCharact
er, int endOffset, int endCharacter) |
43 : m_characters(characters) | 43 : m_characters(characters) |
44 , m_charactersEnd(characters + (endCharacter - currentCharacter)) | 44 , m_charactersEnd(characters + (endCharacter - currentCharacter)) |
45 , m_offset(currentCharacter) | 45 , m_offset(currentCharacter) |
46 , m_endOffset(endOffset) | 46 , m_endOffset(endOffset) |
47 , m_currentGlyphLength(0) | 47 , m_currentGlyphLength(0) |
48 { | 48 { |
49 } | 49 } |
50 | 50 |
51 bool UTF16TextIterator::consumeSlowCase(UChar32& character) | 51 bool UTF16TextIterator::consumeSurrogatePair(UChar32& character) |
52 { | 52 { |
53 if (character <= 0x30FE) { | |
54 // Deal with Hiragana and Katakana voiced and semi-voiced syllables. | |
55 // Normalize into composed form, and then look for glyph with base + | |
56 // combined mark. | |
57 if (UChar32 normalized = normalizeVoicingMarks()) { | |
58 character = normalized; | |
59 m_currentGlyphLength = 2; | |
60 } | |
61 return true; | |
62 } | |
63 | |
64 if (!U16_IS_SURROGATE(character)) | 53 if (!U16_IS_SURROGATE(character)) |
65 return true; | 54 return true; |
66 | 55 |
67 // If we have a surrogate pair, make sure it starts with the high part. | 56 // If we have a surrogate pair, make sure it starts with the high part. |
68 if (!U16_IS_SURROGATE_LEAD(character)) | 57 if (!U16_IS_SURROGATE_LEAD(character)) |
69 return false; | 58 return false; |
70 | 59 |
71 // Do we have a surrogate pair? If so, determine the full Unicode (32 bit) | 60 // Do we have a surrogate pair? If so, determine the full Unicode (32 bit) |
72 // code point before glyph lookup. | 61 // code point before glyph lookup. |
73 // Make sure we have another character and it's a low surrogate. | 62 // Make sure we have another character and it's a low surrogate. |
(...skipping 19 matching lines...) Expand all Loading... |
93 U16_NEXT(markCharactersEnd, nextCharacterLength, | 82 U16_NEXT(markCharactersEnd, nextCharacterLength, |
94 m_charactersEnd - markCharactersEnd, nextCharacter); | 83 m_charactersEnd - markCharactersEnd, nextCharacter); |
95 if (!(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK)) | 84 if (!(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK)) |
96 break; | 85 break; |
97 markLength += nextCharacterLength; | 86 markLength += nextCharacterLength; |
98 markCharactersEnd += nextCharacterLength; | 87 markCharactersEnd += nextCharacterLength; |
99 } | 88 } |
100 m_currentGlyphLength = markLength; | 89 m_currentGlyphLength = markLength; |
101 } | 90 } |
102 | 91 |
103 UChar32 UTF16TextIterator::normalizeVoicingMarks() | |
104 { | |
105 // According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Com
bining_Class_Values | |
106 static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8; | |
107 | |
108 if (m_offset + 1 >= m_endOffset) | |
109 return 0; | |
110 | |
111 if (combiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombining
Class) { | |
112 // Normalize into composed form using 3.2 rules. | |
113 UChar normalizedCharacters[2] = { 0, 0 }; | |
114 UErrorCode uStatus = U_ZERO_ERROR; | |
115 int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, | |
116 UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus); | |
117 if (resultLength == 1 && !uStatus) | |
118 return normalizedCharacters[0]; | |
119 } | |
120 | |
121 return 0; | |
122 } | 92 } |
123 | |
124 } | |
OLD | NEW |