Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Side by Side Diff: third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H
18 #define LATINIME_BYTE_ARRAY_UTILS_H
19
20 #include <cstdint>
21
22 #include "third_party/android_prediction/defines.h"
23
24 namespace latinime {
25
26 /**
27 * Utility methods for reading byte arrays.
28 */
29 class ByteArrayUtils {
30 public:
31 /**
32 * Integer writing
33 *
34 * Each method write a corresponding size integer in a big endian manner.
35 */
36 static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffe r,
37 const uint32_t data, const int size, int *const pos) {
38 // size must be in 1 to 4.
39 ASSERT(size >= 1 && size <= 4);
40 switch (size) {
41 case 1:
42 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
43 return;
44 case 2:
45 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos) ;
46 return;
47 case 3:
48 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos) ;
49 return;
50 case 4:
51 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos) ;
52 return;
53 default:
54 break;
55 }
56 }
57
58 /**
59 * Integer reading
60 *
61 * Each method read a corresponding size integer in a big endian manner.
62 */
63 static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, cons t int pos) {
64 return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16)
65 ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3];
66 }
67
68 static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, cons t int pos) {
69 return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];
70 }
71
72 static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, cons t int pos) {
73 return (buffer[pos] << 8) ^ buffer[pos + 1];
74 }
75
76 static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) {
77 return buffer[pos];
78 }
79
80 static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
81 const uint8_t *const buffer, int *const pos) {
82 const uint32_t value = readUint32(buffer, *pos);
83 *pos += 4;
84 return value;
85 }
86
87 static AK_FORCE_INLINE int readSint24AndAdvancePosition(
88 const uint8_t *const buffer, int *const pos) {
89 const uint8_t value = readUint8(buffer, *pos);
90 if (value < 0x80) {
91 return readUint24AndAdvancePosition(buffer, pos);
92 } else {
93 (*pos)++;
94 return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffe r, pos));
95 }
96 }
97
98 static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
99 const uint8_t *const buffer, int *const pos) {
100 const uint32_t value = readUint24(buffer, *pos);
101 *pos += 3;
102 return value;
103 }
104
105 static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
106 const uint8_t *const buffer, int *const pos) {
107 const uint16_t value = readUint16(buffer, *pos);
108 *pos += 2;
109 return value;
110 }
111
112 static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
113 const uint8_t *const buffer, int *const pos) {
114 return buffer[(*pos)++];
115 }
116
117 static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
118 const int size, const int pos) {
119 // size must be in 1 to 4.
120 ASSERT(size >= 1 && size <= 4);
121 switch (size) {
122 case 1:
123 return ByteArrayUtils::readUint8(buffer, pos);
124 case 2:
125 return ByteArrayUtils::readUint16(buffer, pos);
126 case 3:
127 return ByteArrayUtils::readUint24(buffer, pos);
128 case 4:
129 return ByteArrayUtils::readUint32(buffer, pos);
130 default:
131 return 0;
132 }
133 }
134
135 /**
136 * Code Point Reading
137 *
138 * 1 byte = bbbbbbbb match
139 * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
140 * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant c hoice because
141 * unicode code points range from 0 to 0x10FFFF, so any 3-byte value s tarting with
142 * 00011111 would be outside unicode.
143 * else: iso-latin-1 code
144 * This allows for the whole unicode range to be encoded, including chars ou tside of
145 * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, excep t control
146 * characters which should never happen anyway (and still work, but take 3 b ytes).
147 */
148 static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
149 int p = pos;
150 return readCodePointAndAdvancePosition(buffer, &p);
151 }
152
153 static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
154 const uint8_t *const buffer, int *const pos) {
155 const uint8_t firstByte = readUint8(buffer, *pos);
156 if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
157 if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
158 *pos += 1;
159 return NOT_A_CODE_POINT;
160 } else {
161 return readUint24AndAdvancePosition(buffer, pos);
162 }
163 } else {
164 *pos += 1;
165 return firstByte;
166 }
167 }
168
169 /**
170 * String (array of code points) Reading
171 *
172 * Reads code points until the terminator is found.
173 */
174 // Returns the length of the string.
175 static int readStringAndAdvancePosition(const uint8_t *const buffer,
176 const int maxLength, int *const outBuffer, int *const pos) {
177 int length = 0;
178 int codePoint = readCodePointAndAdvancePosition(buffer, pos);
179 while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
180 outBuffer[length++] = codePoint;
181 codePoint = readCodePointAndAdvancePosition(buffer, pos);
182 }
183 return length;
184 }
185
186 // Advances the position and returns the length of the string.
187 static int advancePositionToBehindString(
188 const uint8_t *const buffer, const int maxLength, int *const pos) {
189 int length = 0;
190 int codePoint = readCodePointAndAdvancePosition(buffer, pos);
191 while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
192 codePoint = readCodePointAndAdvancePosition(buffer, pos);
193 length++;
194 }
195 return length;
196 }
197
198 /**
199 * String (array of code points) Writing
200 */
201 static void writeCodePointsAndAdvancePosition(uint8_t *const buffer,
202 const int *const codePoints, const int codePointCount, const bool wr itesTerminator,
203 int *const pos) {
204 for (int i = 0; i < codePointCount; ++i) {
205 const int codePoint = codePoints[i];
206 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TE RMINATOR) {
207 break;
208 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
209 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
210 // three bytes character.
211 writeUint24AndAdvancePosition(buffer, codePoint, pos);
212 } else {
213 // one byte character.
214 writeUint8AndAdvancePosition(buffer, codePoint, pos);
215 }
216 }
217 if (writesTerminator) {
218 writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos );
219 }
220 }
221
222 static int calculateRequiredByteCountToStoreCodePoints(const int *const code Points,
223 const int codePointCount, const bool writesTerminator) {
224 int byteCount = 0;
225 for (int i = 0; i < codePointCount; ++i) {
226 const int codePoint = codePoints[i];
227 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TE RMINATOR) {
228 break;
229 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
230 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
231 // three bytes character.
232 byteCount += 3;
233 } else {
234 // one byte character.
235 byteCount += 1;
236 }
237 }
238 if (writesTerminator) {
239 // The terminator is one byte.
240 byteCount += 1;
241 }
242 return byteCount;
243 }
244
245 private:
246 DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
247
248 static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
249 static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
250 static const uint8_t CHARACTER_ARRAY_TERMINATOR;
251
252 static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buf fer,
253 const uint32_t data, int *const pos) {
254 buffer[(*pos)++] = (data >> 24) & 0xFF;
255 buffer[(*pos)++] = (data >> 16) & 0xFF;
256 buffer[(*pos)++] = (data >> 8) & 0xFF;
257 buffer[(*pos)++] = data & 0xFF;
258 }
259
260 static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buf fer,
261 const uint32_t data, int *const pos) {
262 buffer[(*pos)++] = (data >> 16) & 0xFF;
263 buffer[(*pos)++] = (data >> 8) & 0xFF;
264 buffer[(*pos)++] = data & 0xFF;
265 }
266
267 static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buf fer,
268 const uint16_t data, int *const pos) {
269 buffer[(*pos)++] = (data >> 8) & 0xFF;
270 buffer[(*pos)++] = data & 0xFF;
271 }
272
273 static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buff er,
274 const uint8_t data, int *const pos) {
275 buffer[(*pos)++] = data & 0xFF;
276 }
277 };
278 } // namespace latinime
279 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698