Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: third_party/prediction/utils/char_utils.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_CHAR_UTILS_H
18 #define LATINIME_CHAR_UTILS_H
19
20 #include <cctype>
21 #include <cstring>
22 #include <vector>
23
24 #include "third_party/prediction/defines.h"
25
26 namespace latinime {
27
28 class CharUtils {
29 public:
30 static AK_FORCE_INLINE bool isAsciiUpper(int c) {
31 // Note: isupper(...) reports false positives for some Cyrillic characters,
32 // causing them to
33 // be incorrectly lower-cased using toAsciiLower(...) rather than
34 // latin_tolower(...).
35 return (c >= 'A' && c <= 'Z');
36 }
37
38 static AK_FORCE_INLINE int toAsciiLower(int c) { return c - 'A' + 'a'; }
39
40 static AK_FORCE_INLINE bool isAscii(int c) { return isascii(c) != 0; }
41
42 static AK_FORCE_INLINE int toLowerCase(const int c) {
43 if (isAsciiUpper(c)) {
44 return toAsciiLower(c);
45 }
46 if (isAscii(c)) {
47 return c;
48 }
49 return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
50 }
51
52 static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
53 return toLowerCase(toBaseCodePoint(c));
54 }
55
56 static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(
57 const int codePoint) {
58 // TODO: Do not hardcode here
59 return codePoint == KEYCODE_SINGLE_QUOTE ||
60 codePoint == KEYCODE_HYPHEN_MINUS;
61 }
62
63 static AK_FORCE_INLINE int getCodePointCount(const int arraySize,
64 const int* const codePoints) {
65 int size = 0;
66 for (; size < arraySize; ++size) {
67 if (codePoints[size] == '\0') {
68 break;
69 }
70 }
71 return size;
72 }
73
74 static AK_FORCE_INLINE int toBaseCodePoint(int c) {
75 if (c < BASE_CHARS_SIZE) {
76 return static_cast<int>(BASE_CHARS[c]);
77 }
78 return c;
79 }
80
81 static AK_FORCE_INLINE int getSpaceCount(const int* const codePointBuffer,
82 const int length) {
83 int spaceCount = 0;
84 for (int i = 0; i < length; ++i) {
85 if (codePointBuffer[i] == KEYCODE_SPACE) {
86 ++spaceCount;
87 }
88 }
89 return spaceCount;
90 }
91
92 static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) {
93 return codePoint >= MIN_UNICODE_CODE_POINT &&
94 codePoint <= MAX_UNICODE_CODE_POINT;
95 }
96
97 static unsigned short latin_tolower(const unsigned short c);
98 static const std::vector<int> EMPTY_STRING;
99
100 // Returns updated code point count. Returns 0 when the code points cannot be
101 // marked as a
102 // Beginning-of-Sentence.
103 static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(
104 int* const codePoints,
105 const int codePointCount,
106 const int maxCodePoint) {
107 if (codePointCount > 0 &&
108 codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
109 // Marker has already been attached.
110 return codePointCount;
111 }
112 if (codePointCount >= maxCodePoint) {
113 // the code points cannot be marked as a Beginning-of-Sentence.
114 return 0;
115 }
116 memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
117 codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
118 return codePointCount + 1;
119 }
120
121 private:
122 DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
123
124 static const int MIN_UNICODE_CODE_POINT;
125 static const int MAX_UNICODE_CODE_POINT;
126
127 /**
128 * Table mapping most combined Latin, Greek, and Cyrillic characters
129 * to their base characters. If c is in range, BASE_CHARS[c] == c
130 * if c is not a combined character, or the base character if it
131 * is combined.
132 */
133 static const int BASE_CHARS_SIZE = 0x0500;
134 static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
135 };
136 } // namespace latinime
137 #endif // LATINIME_CHAR_UTILS_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698