third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h - Issue 1247903003: Add spellcheck and word suggestion to the prediction service

Unified Diff: third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp ('k') | third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h

diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h

new file mode 100644

index 0000000000000000000000000000000000000000..72ed66c4b9712549f01a6eb3a8c8a825895328a2

--- /dev/null

+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h

@@ -0,0 +1,279 @@

+/*

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ * http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+#ifndef LATINIME_BYTE_ARRAY_UTILS_H

+#define LATINIME_BYTE_ARRAY_UTILS_H

+#include <cstdint>

+#include "third_party/android_prediction/defines.h"

+namespace latinime {

+/**

+ * Utility methods for reading byte arrays.

+ */

+class ByteArrayUtils {

+ public:

+ /**

+ * Integer writing

+ *

+ * Each method write a corresponding size integer in a big endian manner.

+ */

+ static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer,

+ const uint32_t data, const int size, int *const pos) {

+ // size must be in 1 to 4.

+ ASSERT(size >= 1 && size <= 4);

+ switch (size) {

+ case 1:

+ ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);

+ return;

+ case 2:

+ ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);

+ return;

+ case 3:

+ ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);

+ return;

+ case 4:

+ ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);

+ return;

+ default:

+ break;

+ }

+ /**

+ * Integer reading

+ *

+ * Each method read a corresponding size integer in a big endian manner.

+ */

+ static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) {

+ return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16)

+ ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3];

+ }

+ static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) {

+ return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];

+ }

+ static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) {

+ return (buffer[pos] << 8) ^ buffer[pos + 1];

+ }

+ static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) {

+ return buffer[pos];

+ }

+ static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(

+ const uint8_t *const buffer, int *const pos) {

+ const uint32_t value = readUint32(buffer, *pos);

+ *pos += 4;

+ return value;

+ }

+ static AK_FORCE_INLINE int readSint24AndAdvancePosition(

+ const uint8_t *const buffer, int *const pos) {

+ const uint8_t value = readUint8(buffer, *pos);

+ if (value < 0x80) {

+ return readUint24AndAdvancePosition(buffer, pos);

+ } else {

+ (*pos)++;

+ return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));

+ }

+ static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(

+ const uint8_t *const buffer, int *const pos) {

+ const uint32_t value = readUint24(buffer, *pos);

+ *pos += 3;

+ return value;

+ }

+ static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(

+ const uint8_t *const buffer, int *const pos) {

+ const uint16_t value = readUint16(buffer, *pos);

+ *pos += 2;

+ return value;

+ }

+ static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(

+ const uint8_t *const buffer, int *const pos) {

+ return buffer[(*pos)++];

+ }

+ static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,

+ const int size, const int pos) {

+ // size must be in 1 to 4.

+ ASSERT(size >= 1 && size <= 4);

+ switch (size) {

+ case 1:

+ return ByteArrayUtils::readUint8(buffer, pos);

+ case 2:

+ return ByteArrayUtils::readUint16(buffer, pos);

+ case 3:

+ return ByteArrayUtils::readUint24(buffer, pos);

+ case 4:

+ return ByteArrayUtils::readUint32(buffer, pos);

+ default:

+ return 0;

+ }

+ /**

+ * Code Point Reading

+ *

+ * 1 byte = bbbbbbbb match

+ * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte

+ * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because

+ * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with

+ * 00011111 would be outside unicode.

+ * else: iso-latin-1 code

+ * This allows for the whole unicode range to be encoded, including chars outside of

+ * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control

+ * characters which should never happen anyway (and still work, but take 3 bytes).

+ */

+ static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {

+ int p = pos;

+ return readCodePointAndAdvancePosition(buffer, &p);

+ }

+ static AK_FORCE_INLINE int readCodePointAndAdvancePosition(

+ const uint8_t *const buffer, int *const pos) {

+ const uint8_t firstByte = readUint8(buffer, *pos);

+ if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {

+ if (firstByte == CHARACTER_ARRAY_TERMINATOR) {

+ *pos += 1;

+ return NOT_A_CODE_POINT;

+ } else {

+ return readUint24AndAdvancePosition(buffer, pos);

+ }

+ } else {

+ *pos += 1;

+ return firstByte;

+ }

+ /**

+ * String (array of code points) Reading

+ *

+ * Reads code points until the terminator is found.

+ */

+ // Returns the length of the string.

+ static int readStringAndAdvancePosition(const uint8_t *const buffer,

+ const int maxLength, int *const outBuffer, int *const pos) {

+ int length = 0;

+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);

+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) {

+ outBuffer[length++] = codePoint;

+ codePoint = readCodePointAndAdvancePosition(buffer, pos);

+ }

+ return length;

+ }

+ // Advances the position and returns the length of the string.

+ static int advancePositionToBehindString(

+ const uint8_t *const buffer, const int maxLength, int *const pos) {

+ int length = 0;

+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);

+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) {

+ codePoint = readCodePointAndAdvancePosition(buffer, pos);

+ length++;

+ }

+ return length;

+ }

+ /**

+ * String (array of code points) Writing

+ */

+ static void writeCodePointsAndAdvancePosition(uint8_t *const buffer,

+ const int *const codePoints, const int codePointCount, const bool writesTerminator,

+ int *const pos) {

+ for (int i = 0; i < codePointCount; ++i) {

+ const int codePoint = codePoints[i];

+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {

+ break;

+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE

+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {

+ // three bytes character.

+ writeUint24AndAdvancePosition(buffer, codePoint, pos);

+ } else {

+ // one byte character.

+ writeUint8AndAdvancePosition(buffer, codePoint, pos);

+ }

+ if (writesTerminator) {

+ writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos);

+ }

+ static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints,

+ const int codePointCount, const bool writesTerminator) {

+ int byteCount = 0;

+ for (int i = 0; i < codePointCount; ++i) {

+ const int codePoint = codePoints[i];

+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {

+ break;

+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE

+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {

+ // three bytes character.

+ byteCount += 3;

+ } else {

+ // one byte character.

+ byteCount += 1;

+ }

+ if (writesTerminator) {

+ // The terminator is one byte.

+ byteCount += 1;

+ }

+ return byteCount;

+ }

+ private:

+ DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);

+ static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;

+ static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;

+ static const uint8_t CHARACTER_ARRAY_TERMINATOR;

+ static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,

+ const uint32_t data, int *const pos) {

+ buffer[(*pos)++] = (data >> 24) & 0xFF;

+ buffer[(*pos)++] = (data >> 16) & 0xFF;

+ buffer[(*pos)++] = (data >> 8) & 0xFF;

+ buffer[(*pos)++] = data & 0xFF;

+ }

+ static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer,

+ const uint32_t data, int *const pos) {

+ buffer[(*pos)++] = (data >> 16) & 0xFF;

+ buffer[(*pos)++] = (data >> 8) & 0xFF;

+ buffer[(*pos)++] = data & 0xFF;

+ }

+ static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer,

+ const uint16_t data, int *const pos) {

+ buffer[(*pos)++] = (data >> 8) & 0xFF;

+ buffer[(*pos)++] = data & 0xFF;

+ }

+ static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer,

+ const uint8_t data, int *const pos) {

+ buffer[(*pos)++] = data & 0xFF;

+ }

+};

+} // namespace latinime

+#endif /* LATINIME_BYTE_ARRAY_UTILS_H */