Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: third_party/android_prediction/utils/char_utils.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: changed third_party/prediction to third_party/android_prediction; added CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_CHAR_UTILS_H
18 #define LATINIME_CHAR_UTILS_H
19
20 #include <cctype>
21 #include <cstring>
22 #include <vector>
23
24 #include "third_party/android_prediction/defines.h"
25
26 namespace latinime {
27
28 class CharUtils {
29 public:
30 static AK_FORCE_INLINE bool isAsciiUpper(int c) {
31 // Note: isupper(...) reports false positives for some Cyrillic characte rs, causing them to
32 // be incorrectly lower-cased using toAsciiLower(...) rather than latin_ tolower(...).
33 return (c >= 'A' && c <= 'Z');
34 }
35
36 static AK_FORCE_INLINE int toAsciiLower(int c) {
37 return c - 'A' + 'a';
38 }
39
40 static AK_FORCE_INLINE bool isAscii(int c) {
41 return isascii(c) != 0;
42 }
43
44 static AK_FORCE_INLINE int toLowerCase(const int c) {
45 if (isAsciiUpper(c)) {
46 return toAsciiLower(c);
47 }
48 if (isAscii(c)) {
49 return c;
50 }
51 return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
52 }
53
54 static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
55 return toLowerCase(toBaseCodePoint(c));
56 }
57
58 static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoi nt) {
59 // TODO: Do not hardcode here
60 return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_ MINUS;
61 }
62
63 static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
64 int size = 0;
65 for (; size < arraySize; ++size) {
66 if (codePoints[size] == '\0') {
67 break;
68 }
69 }
70 return size;
71 }
72
73 static AK_FORCE_INLINE int toBaseCodePoint(int c) {
74 if (c < BASE_CHARS_SIZE) {
75 return static_cast<int>(BASE_CHARS[c]);
76 }
77 return c;
78 }
79
80 static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, c onst int length) {
81 int spaceCount = 0;
82 for (int i = 0; i < length; ++i) {
83 if (codePointBuffer[i] == KEYCODE_SPACE) {
84 ++spaceCount;
85 }
86 }
87 return spaceCount;
88 }
89
90 static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) {
91 return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_C ODE_POINT;
92 }
93
94 static unsigned short latin_tolower(const unsigned short c);
95 static const std::vector<int> EMPTY_STRING;
96
97 // Returns updated code point count. Returns 0 when the code points cannot b e marked as a
98 // Beginning-of-Sentence.
99 static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePo ints,
100 const int codePointCount, const int maxCodePoint) {
101 if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTE NCE) {
102 // Marker has already been attached.
103 return codePointCount;
104 }
105 if (codePointCount >= maxCodePoint) {
106 // the code points cannot be marked as a Beginning-of-Sentence.
107 return 0;
108 }
109 memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
110 codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
111 return codePointCount + 1;
112 }
113
114 private:
115 DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
116
117 static const int MIN_UNICODE_CODE_POINT;
118 static const int MAX_UNICODE_CODE_POINT;
119
120 /**
121 * Table mapping most combined Latin, Greek, and Cyrillic characters
122 * to their base characters. If c is in range, BASE_CHARS[c] == c
123 * if c is not a combined character, or the base character if it
124 * is combined.
125 */
126 static const int BASE_CHARS_SIZE = 0x0500;
127 static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
128 };
129 } // namespace latinime
130 #endif // LATINIME_CHAR_UTILS_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698