Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Side by Side Diff: third_party/prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H
18 #define LATINIME_BYTE_ARRAY_UTILS_H
19
20 #include <cstdint>
21
22 #include "third_party/prediction/defines.h"
23
24 namespace latinime {
25
26 /**
27 * Utility methods for reading byte arrays.
28 */
29 class ByteArrayUtils {
30 public:
31 /**
32 * Integer writing
33 *
34 * Each method write a corresponding size integer in a big endian manner.
35 */
36 static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t* const buffer,
37 const uint32_t data,
38 const int size,
39 int* const pos) {
40 // size must be in 1 to 4.
41 ASSERT(size >= 1 && size <= 4);
42 switch (size) {
43 case 1:
44 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
45 return;
46 case 2:
47 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);
48 return;
49 case 3:
50 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);
51 return;
52 case 4:
53 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);
54 return;
55 default:
56 break;
57 }
58 }
59
60 /**
61 * Integer reading
62 *
63 * Each method read a corresponding size integer in a big endian manner.
64 */
65 static AK_FORCE_INLINE uint32_t
66 readUint32(const uint8_t* const buffer, const int pos) {
67 return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16) ^
68 (buffer[pos + 2] << 8) ^ buffer[pos + 3];
69 }
70
71 static AK_FORCE_INLINE uint32_t
72 readUint24(const uint8_t* const buffer, const int pos) {
73 return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];
74 }
75
76 static AK_FORCE_INLINE uint16_t
77 readUint16(const uint8_t* const buffer, const int pos) {
78 return (buffer[pos] << 8) ^ buffer[pos + 1];
79 }
80
81 static AK_FORCE_INLINE uint8_t
82 readUint8(const uint8_t* const buffer, const int pos) {
83 return buffer[pos];
84 }
85
86 static AK_FORCE_INLINE uint32_t
87 readUint32AndAdvancePosition(const uint8_t* const buffer, int* const pos) {
88 const uint32_t value = readUint32(buffer, *pos);
89 *pos += 4;
90 return value;
91 }
92
93 static AK_FORCE_INLINE int readSint24AndAdvancePosition(
94 const uint8_t* const buffer,
95 int* const pos) {
96 const uint8_t value = readUint8(buffer, *pos);
97 if (value < 0x80) {
98 return readUint24AndAdvancePosition(buffer, pos);
99 } else {
100 (*pos)++;
101 return -(((value & 0x7F) << 16) ^
102 readUint16AndAdvancePosition(buffer, pos));
103 }
104 }
105
106 static AK_FORCE_INLINE uint32_t
107 readUint24AndAdvancePosition(const uint8_t* const buffer, int* const pos) {
108 const uint32_t value = readUint24(buffer, *pos);
109 *pos += 3;
110 return value;
111 }
112
113 static AK_FORCE_INLINE uint16_t
114 readUint16AndAdvancePosition(const uint8_t* const buffer, int* const pos) {
115 const uint16_t value = readUint16(buffer, *pos);
116 *pos += 2;
117 return value;
118 }
119
120 static AK_FORCE_INLINE uint8_t
121 readUint8AndAdvancePosition(const uint8_t* const buffer, int* const pos) {
122 return buffer[(*pos)++];
123 }
124
125 static AK_FORCE_INLINE int readUint(const uint8_t* const buffer,
126 const int size,
127 const int pos) {
128 // size must be in 1 to 4.
129 ASSERT(size >= 1 && size <= 4);
130 switch (size) {
131 case 1:
132 return ByteArrayUtils::readUint8(buffer, pos);
133 case 2:
134 return ByteArrayUtils::readUint16(buffer, pos);
135 case 3:
136 return ByteArrayUtils::readUint24(buffer, pos);
137 case 4:
138 return ByteArrayUtils::readUint32(buffer, pos);
139 default:
140 return 0;
141 }
142 }
143
144 /**
145 * Code Point Reading
146 *
147 * 1 byte = bbbbbbbb match
148 * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
149 * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant
150 *choice because
151 * unicode code points range from 0 to 0x10FFFF, so any 3-byte value
152 *starting with
153 * 00011111 would be outside unicode.
154 * else: iso-latin-1 code
155 * This allows for the whole unicode range to be encoded, including chars
156 *outside of
157 * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except
158 *control
159 * characters which should never happen anyway (and still work, but take 3
160 *bytes).
161 */
162 static AK_FORCE_INLINE int readCodePoint(const uint8_t* const buffer,
163 const int pos) {
164 int p = pos;
165 return readCodePointAndAdvancePosition(buffer, &p);
166 }
167
168 static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
169 const uint8_t* const buffer,
170 int* const pos) {
171 const uint8_t firstByte = readUint8(buffer, *pos);
172 if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
173 if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
174 *pos += 1;
175 return NOT_A_CODE_POINT;
176 } else {
177 return readUint24AndAdvancePosition(buffer, pos);
178 }
179 } else {
180 *pos += 1;
181 return firstByte;
182 }
183 }
184
185 /**
186 * String (array of code points) Reading
187 *
188 * Reads code points until the terminator is found.
189 */
190 // Returns the length of the string.
191 static int readStringAndAdvancePosition(const uint8_t* const buffer,
192 const int maxLength,
193 int* const outBuffer,
194 int* const pos) {
195 int length = 0;
196 int codePoint = readCodePointAndAdvancePosition(buffer, pos);
197 while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
198 outBuffer[length++] = codePoint;
199 codePoint = readCodePointAndAdvancePosition(buffer, pos);
200 }
201 return length;
202 }
203
204 // Advances the position and returns the length of the string.
205 static int advancePositionToBehindString(const uint8_t* const buffer,
206 const int maxLength,
207 int* const pos) {
208 int length = 0;
209 int codePoint = readCodePointAndAdvancePosition(buffer, pos);
210 while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
211 codePoint = readCodePointAndAdvancePosition(buffer, pos);
212 length++;
213 }
214 return length;
215 }
216
217 /**
218 * String (array of code points) Writing
219 */
220 static void writeCodePointsAndAdvancePosition(uint8_t* const buffer,
221 const int* const codePoints,
222 const int codePointCount,
223 const bool writesTerminator,
224 int* const pos) {
225 for (int i = 0; i < codePointCount; ++i) {
226 const int codePoint = codePoints[i];
227 if (codePoint == NOT_A_CODE_POINT ||
228 codePoint == CHARACTER_ARRAY_TERMINATOR) {
229 break;
230 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE ||
231 codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
232 // three bytes character.
233 writeUint24AndAdvancePosition(buffer, codePoint, pos);
234 } else {
235 // one byte character.
236 writeUint8AndAdvancePosition(buffer, codePoint, pos);
237 }
238 }
239 if (writesTerminator) {
240 writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos);
241 }
242 }
243
244 static int calculateRequiredByteCountToStoreCodePoints(
245 const int* const codePoints,
246 const int codePointCount,
247 const bool writesTerminator) {
248 int byteCount = 0;
249 for (int i = 0; i < codePointCount; ++i) {
250 const int codePoint = codePoints[i];
251 if (codePoint == NOT_A_CODE_POINT ||
252 codePoint == CHARACTER_ARRAY_TERMINATOR) {
253 break;
254 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE ||
255 codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
256 // three bytes character.
257 byteCount += 3;
258 } else {
259 // one byte character.
260 byteCount += 1;
261 }
262 }
263 if (writesTerminator) {
264 // The terminator is one byte.
265 byteCount += 1;
266 }
267 return byteCount;
268 }
269
270 private:
271 DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
272
273 static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
274 static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
275 static const uint8_t CHARACTER_ARRAY_TERMINATOR;
276
277 static AK_FORCE_INLINE void writeUint32AndAdvancePosition(
278 uint8_t* const buffer,
279 const uint32_t data,
280 int* const pos) {
281 buffer[(*pos)++] = (data >> 24) & 0xFF;
282 buffer[(*pos)++] = (data >> 16) & 0xFF;
283 buffer[(*pos)++] = (data >> 8) & 0xFF;
284 buffer[(*pos)++] = data & 0xFF;
285 }
286
287 static AK_FORCE_INLINE void writeUint24AndAdvancePosition(
288 uint8_t* const buffer,
289 const uint32_t data,
290 int* const pos) {
291 buffer[(*pos)++] = (data >> 16) & 0xFF;
292 buffer[(*pos)++] = (data >> 8) & 0xFF;
293 buffer[(*pos)++] = data & 0xFF;
294 }
295
296 static AK_FORCE_INLINE void writeUint16AndAdvancePosition(
297 uint8_t* const buffer,
298 const uint16_t data,
299 int* const pos) {
300 buffer[(*pos)++] = (data >> 8) & 0xFF;
301 buffer[(*pos)++] = data & 0xFF;
302 }
303
304 static AK_FORCE_INLINE void writeUint8AndAdvancePosition(
305 uint8_t* const buffer,
306 const uint8_t data,
307 int* const pos) {
308 buffer[(*pos)++] = data & 0xFF;
309 }
310 };
311 } // namespace latinime
312 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698