OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2013 The Android Open Source Project |
| 3 * |
| 4 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 * you may not use this file except in compliance with the License. |
| 6 * You may obtain a copy of the License at |
| 7 * |
| 8 * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 * |
| 10 * Unless required by applicable law or agreed to in writing, software |
| 11 * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 * See the License for the specific language governing permissions and |
| 14 * limitations under the License. |
| 15 */ |
| 16 |
| 17 #include "third_party/prediction/suggest/core/dictionary/digraph_utils.h" |
| 18 |
| 19 #include <cstdlib> |
| 20 |
| 21 #include "third_party/prediction/defines.h" |
| 22 #include "third_party/prediction/suggest/core/policy/dictionary_header_structure
_policy.h" |
| 23 #include "third_party/prediction/utils/char_utils.h" |
| 24 |
| 25 namespace latinime { |
| 26 |
| 27 const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = { |
| 28 {'a', 'e', 0x00E4}, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS |
| 29 {'o', 'e', 0x00F6}, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS |
| 30 {'u', 'e', 0x00FC}}; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS |
| 31 const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = { |
| 32 DIGRAPH_TYPE_GERMAN_UMLAUT}; |
| 33 |
| 34 /* static */ bool DigraphUtils::hasDigraphForCodePoint( |
| 35 const DictionaryHeaderStructurePolicy* const headerPolicy, |
| 36 const int compositeGlyphCodePoint) { |
| 37 const DigraphUtils::DigraphType digraphType = |
| 38 getDigraphTypeForDictionary(headerPolicy); |
| 39 if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint( |
| 40 digraphType, compositeGlyphCodePoint)) { |
| 41 return true; |
| 42 } |
| 43 return false; |
| 44 } |
| 45 |
| 46 // Returns the digraph type associated with the given dictionary. |
| 47 /* static */ DigraphUtils::DigraphType |
| 48 DigraphUtils::getDigraphTypeForDictionary( |
| 49 const DictionaryHeaderStructurePolicy* const headerPolicy) { |
| 50 if (headerPolicy->requiresGermanUmlautProcessing()) { |
| 51 return DIGRAPH_TYPE_GERMAN_UMLAUT; |
| 52 } |
| 53 return DIGRAPH_TYPE_NONE; |
| 54 } |
| 55 |
| 56 // Returns the digraph codepoint for the given composite glyph codepoint and |
| 57 // digraph codepoint index |
| 58 // (which specifies the first or second codepoint in the digraph). |
| 59 /* static */ int DigraphUtils::getDigraphCodePointForIndex( |
| 60 const int compositeGlyphCodePoint, |
| 61 const DigraphCodePointIndex digraphCodePointIndex) { |
| 62 if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) { |
| 63 return NOT_A_CODE_POINT; |
| 64 } |
| 65 const DigraphUtils::digraph_t* const digraph = |
| 66 DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint); |
| 67 if (!digraph) { |
| 68 return NOT_A_CODE_POINT; |
| 69 } |
| 70 if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) { |
| 71 return digraph->first; |
| 72 } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) { |
| 73 return digraph->second; |
| 74 } |
| 75 ASSERT(false); |
| 76 return NOT_A_CODE_POINT; |
| 77 } |
| 78 |
| 79 // Retrieves the set of all digraphs associated with the given digraph type. |
| 80 // Returns the size of the digraph array, or 0 if none exist. |
| 81 /* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize( |
| 82 const DigraphUtils::DigraphType digraphType, |
| 83 const DigraphUtils::digraph_t** const digraphs) { |
| 84 if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) { |
| 85 *digraphs = GERMAN_UMLAUT_DIGRAPHS; |
| 86 return NELEMS(GERMAN_UMLAUT_DIGRAPHS); |
| 87 } |
| 88 return 0; |
| 89 } |
| 90 |
| 91 /** |
| 92 * Returns the digraph for the input composite glyph codepoint, or nullptr if |
| 93 * none exists. |
| 94 * compositeGlyphCodePoint: the method returns the digraph corresponding to this |
| 95 * codepoint. |
| 96 */ |
| 97 /* static */ const DigraphUtils::digraph_t* |
| 98 DigraphUtils::getDigraphForCodePoint(const int compositeGlyphCodePoint) { |
| 99 for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) { |
| 100 const DigraphUtils::digraph_t* const digraph = |
| 101 getDigraphForDigraphTypeAndCodePoint(USED_DIGRAPH_TYPES[i], |
| 102 compositeGlyphCodePoint); |
| 103 if (digraph) { |
| 104 return digraph; |
| 105 } |
| 106 } |
| 107 return nullptr; |
| 108 } |
| 109 |
| 110 /** |
| 111 * Returns the digraph for the input composite glyph codepoint, or nullptr if |
| 112 * none exists. |
| 113 * digraphType: the type of digraphs supported. |
| 114 * compositeGlyphCodePoint: the method returns the digraph corresponding to this |
| 115 * codepoint. |
| 116 */ |
| 117 /* static */ const DigraphUtils::digraph_t* |
| 118 DigraphUtils::getDigraphForDigraphTypeAndCodePoint( |
| 119 const DigraphUtils::DigraphType digraphType, |
| 120 const int compositeGlyphCodePoint) { |
| 121 const DigraphUtils::digraph_t* digraphs = nullptr; |
| 122 const int compositeGlyphLowerCodePoint = |
| 123 CharUtils::toLowerCase(compositeGlyphCodePoint); |
| 124 const int digraphsSize = |
| 125 DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, |
| 126 &digraphs); |
| 127 for (int i = 0; i < digraphsSize; i++) { |
| 128 if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) { |
| 129 return &digraphs[i]; |
| 130 } |
| 131 } |
| 132 return nullptr; |
| 133 } |
| 134 |
| 135 } // namespace latinime |
OLD | NEW |