third_party/prediction/utils/char_utils.h - Issue 1247903003: Add spellcheck and word suggestion to the prediction service

Side by Side Diff: third_party/prediction/utils/char_utils.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2010 The Android Open Source Project

	3 *

	4 * Licensed under the Apache License, Version 2.0 (the "License");

	5 * you may not use this file except in compliance with the License.

	6 * You may obtain a copy of the License at

	7 *

	8 * http://www.apache.org/licenses/LICENSE-2.0

	9 *

	10 * Unless required by applicable law or agreed to in writing, software

	11 * distributed under the License is distributed on an "AS IS" BASIS,

	12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

	13 * See the License for the specific language governing permissions and

	14 * limitations under the License.

	15 */

	16

	17 #ifndef LATINIME_CHAR_UTILS_H

	18 #define LATINIME_CHAR_UTILS_H

	19

	20 #include <cctype>

	21 #include <cstring>

	22 #include <vector>

	23

	24 #include "third_party/prediction/defines.h"

	25

	26 namespace latinime {

	27

	28 class CharUtils {

	29 public:

	30 static AK_FORCE_INLINE bool isAsciiUpper(int c) {

	31 // Note: isupper(...) reports false positives for some Cyrillic characters,

	32 // causing them to

	33 // be incorrectly lower-cased using toAsciiLower(...) rather than

	34 // latin_tolower(...).

	35 return (c >= 'A' && c <= 'Z');

	36 }

	37

	38 static AK_FORCE_INLINE int toAsciiLower(int c) { return c - 'A' + 'a'; }

	39

	40 static AK_FORCE_INLINE bool isAscii(int c) { return isascii(c) != 0; }

	41

	42 static AK_FORCE_INLINE int toLowerCase(const int c) {

	43 if (isAsciiUpper(c)) {

	44 return toAsciiLower(c);

	45 }

	46 if (isAscii(c)) {

	47 return c;

	48 }

	49 return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));

	50 }

	51

	52 static AK_FORCE_INLINE int toBaseLowerCase(const int c) {

	53 return toLowerCase(toBaseCodePoint(c));

	54 }

	55

	56 static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(

	57 const int codePoint) {

	58 // TODO: Do not hardcode here

	59 return codePoint == KEYCODE_SINGLE_QUOTE \|\|

	60 codePoint == KEYCODE_HYPHEN_MINUS;

	61 }

	62

	63 static AK_FORCE_INLINE int getCodePointCount(const int arraySize,

	64 const int* const codePoints) {

	65 int size = 0;

	66 for (; size < arraySize; ++size) {

	67 if (codePoints[size] == '\0') {

	68 break;

	69 }

	70 }

	71 return size;

	72 }

	73

	74 static AK_FORCE_INLINE int toBaseCodePoint(int c) {

	75 if (c < BASE_CHARS_SIZE) {

	76 return static_cast<int>(BASE_CHARS[c]);

	77 }

	78 return c;

	79 }

	80

	81 static AK_FORCE_INLINE int getSpaceCount(const int* const codePointBuffer,

	82 const int length) {

	83 int spaceCount = 0;

	84 for (int i = 0; i < length; ++i) {

	85 if (codePointBuffer[i] == KEYCODE_SPACE) {

	86 ++spaceCount;

	87 }

	88 }

	89 return spaceCount;

	90 }

	91

	92 static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) {

	93 return codePoint >= MIN_UNICODE_CODE_POINT &&

	94 codePoint <= MAX_UNICODE_CODE_POINT;

	95 }

	96

	97 static unsigned short latin_tolower(const unsigned short c);

	98 static const std::vector<int> EMPTY_STRING;

	99

	100 // Returns updated code point count. Returns 0 when the code points cannot be

	101 // marked as a

	102 // Beginning-of-Sentence.

	103 static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(

	104 int* const codePoints,

	105 const int codePointCount,

	106 const int maxCodePoint) {

	107 if (codePointCount > 0 &&

	108 codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {

	109 // Marker has already been attached.

	110 return codePointCount;

	111 }

	112 if (codePointCount >= maxCodePoint) {

	113 // the code points cannot be marked as a Beginning-of-Sentence.

	114 return 0;

	115 }

	116 memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);

	117 codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;

	118 return codePointCount + 1;

	119 }

	120

	121 private:

	122 DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);

	123

	124 static const int MIN_UNICODE_CODE_POINT;

	125 static const int MAX_UNICODE_CODE_POINT;

	126

	127 /**

	128 * Table mapping most combined Latin, Greek, and Cyrillic characters

	129 * to their base characters. If c is in range, BASE_CHARS[c] == c

	130 * if c is not a combined character, or the base character if it

	131 * is combined.

	132 */

	133 static const int BASE_CHARS_SIZE = 0x0500;

	134 static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];

	135 };

	136 } // namespace latinime

	137 #endif // LATINIME_CHAR_UTILS_H

OLD	NEW

« third_party/prediction/README.chromium ('K') | « third_party/prediction/utils/byte_array_view.h ('k') | third_party/prediction/utils/char_utils.cpp » ('j') | no next file with comments »