Index: third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h |
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..72ed66c4b9712549f01a6eb3a8c8a825895328a2 |
--- /dev/null |
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h |
@@ -0,0 +1,279 @@ |
+/* |
+ * Copyright (C) 2013, The Android Open Source Project |
+ * |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
+ * you may not use this file except in compliance with the License. |
+ * You may obtain a copy of the License at |
+ * |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
+ * |
+ * Unless required by applicable law or agreed to in writing, software |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+ * See the License for the specific language governing permissions and |
+ * limitations under the License. |
+ */ |
+ |
+#ifndef LATINIME_BYTE_ARRAY_UTILS_H |
+#define LATINIME_BYTE_ARRAY_UTILS_H |
+ |
+#include <cstdint> |
+ |
+#include "third_party/android_prediction/defines.h" |
+ |
+namespace latinime { |
+ |
+/** |
+ * Utility methods for reading byte arrays. |
+ */ |
+class ByteArrayUtils { |
+ public: |
+ /** |
+ * Integer writing |
+ * |
+ * Each method write a corresponding size integer in a big endian manner. |
+ */ |
+ static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer, |
+ const uint32_t data, const int size, int *const pos) { |
+ // size must be in 1 to 4. |
+ ASSERT(size >= 1 && size <= 4); |
+ switch (size) { |
+ case 1: |
+ ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos); |
+ return; |
+ case 2: |
+ ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos); |
+ return; |
+ case 3: |
+ ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos); |
+ return; |
+ case 4: |
+ ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos); |
+ return; |
+ default: |
+ break; |
+ } |
+ } |
+ |
+ /** |
+ * Integer reading |
+ * |
+ * Each method read a corresponding size integer in a big endian manner. |
+ */ |
+ static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) { |
+ return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16) |
+ ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3]; |
+ } |
+ |
+ static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) { |
+ return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2]; |
+ } |
+ |
+ static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) { |
+ return (buffer[pos] << 8) ^ buffer[pos + 1]; |
+ } |
+ |
+ static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) { |
+ return buffer[pos]; |
+ } |
+ |
+ static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition( |
+ const uint8_t *const buffer, int *const pos) { |
+ const uint32_t value = readUint32(buffer, *pos); |
+ *pos += 4; |
+ return value; |
+ } |
+ |
+ static AK_FORCE_INLINE int readSint24AndAdvancePosition( |
+ const uint8_t *const buffer, int *const pos) { |
+ const uint8_t value = readUint8(buffer, *pos); |
+ if (value < 0x80) { |
+ return readUint24AndAdvancePosition(buffer, pos); |
+ } else { |
+ (*pos)++; |
+ return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos)); |
+ } |
+ } |
+ |
+ static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition( |
+ const uint8_t *const buffer, int *const pos) { |
+ const uint32_t value = readUint24(buffer, *pos); |
+ *pos += 3; |
+ return value; |
+ } |
+ |
+ static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition( |
+ const uint8_t *const buffer, int *const pos) { |
+ const uint16_t value = readUint16(buffer, *pos); |
+ *pos += 2; |
+ return value; |
+ } |
+ |
+ static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition( |
+ const uint8_t *const buffer, int *const pos) { |
+ return buffer[(*pos)++]; |
+ } |
+ |
+ static AK_FORCE_INLINE int readUint(const uint8_t *const buffer, |
+ const int size, const int pos) { |
+ // size must be in 1 to 4. |
+ ASSERT(size >= 1 && size <= 4); |
+ switch (size) { |
+ case 1: |
+ return ByteArrayUtils::readUint8(buffer, pos); |
+ case 2: |
+ return ByteArrayUtils::readUint16(buffer, pos); |
+ case 3: |
+ return ByteArrayUtils::readUint24(buffer, pos); |
+ case 4: |
+ return ByteArrayUtils::readUint32(buffer, pos); |
+ default: |
+ return 0; |
+ } |
+ } |
+ |
+ /** |
+ * Code Point Reading |
+ * |
+ * 1 byte = bbbbbbbb match |
+ * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte |
+ * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because |
+ * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with |
+ * 00011111 would be outside unicode. |
+ * else: iso-latin-1 code |
+ * This allows for the whole unicode range to be encoded, including chars outside of |
+ * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control |
+ * characters which should never happen anyway (and still work, but take 3 bytes). |
+ */ |
+ static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) { |
+ int p = pos; |
+ return readCodePointAndAdvancePosition(buffer, &p); |
+ } |
+ |
+ static AK_FORCE_INLINE int readCodePointAndAdvancePosition( |
+ const uint8_t *const buffer, int *const pos) { |
+ const uint8_t firstByte = readUint8(buffer, *pos); |
+ if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) { |
+ if (firstByte == CHARACTER_ARRAY_TERMINATOR) { |
+ *pos += 1; |
+ return NOT_A_CODE_POINT; |
+ } else { |
+ return readUint24AndAdvancePosition(buffer, pos); |
+ } |
+ } else { |
+ *pos += 1; |
+ return firstByte; |
+ } |
+ } |
+ |
+ /** |
+ * String (array of code points) Reading |
+ * |
+ * Reads code points until the terminator is found. |
+ */ |
+ // Returns the length of the string. |
+ static int readStringAndAdvancePosition(const uint8_t *const buffer, |
+ const int maxLength, int *const outBuffer, int *const pos) { |
+ int length = 0; |
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos); |
+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) { |
+ outBuffer[length++] = codePoint; |
+ codePoint = readCodePointAndAdvancePosition(buffer, pos); |
+ } |
+ return length; |
+ } |
+ |
+ // Advances the position and returns the length of the string. |
+ static int advancePositionToBehindString( |
+ const uint8_t *const buffer, const int maxLength, int *const pos) { |
+ int length = 0; |
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos); |
+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) { |
+ codePoint = readCodePointAndAdvancePosition(buffer, pos); |
+ length++; |
+ } |
+ return length; |
+ } |
+ |
+ /** |
+ * String (array of code points) Writing |
+ */ |
+ static void writeCodePointsAndAdvancePosition(uint8_t *const buffer, |
+ const int *const codePoints, const int codePointCount, const bool writesTerminator, |
+ int *const pos) { |
+ for (int i = 0; i < codePointCount; ++i) { |
+ const int codePoint = codePoints[i]; |
+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { |
+ break; |
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE |
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { |
+ // three bytes character. |
+ writeUint24AndAdvancePosition(buffer, codePoint, pos); |
+ } else { |
+ // one byte character. |
+ writeUint8AndAdvancePosition(buffer, codePoint, pos); |
+ } |
+ } |
+ if (writesTerminator) { |
+ writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos); |
+ } |
+ } |
+ |
+ static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints, |
+ const int codePointCount, const bool writesTerminator) { |
+ int byteCount = 0; |
+ for (int i = 0; i < codePointCount; ++i) { |
+ const int codePoint = codePoints[i]; |
+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { |
+ break; |
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE |
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { |
+ // three bytes character. |
+ byteCount += 3; |
+ } else { |
+ // one byte character. |
+ byteCount += 1; |
+ } |
+ } |
+ if (writesTerminator) { |
+ // The terminator is one byte. |
+ byteCount += 1; |
+ } |
+ return byteCount; |
+ } |
+ |
+ private: |
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); |
+ |
+ static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE; |
+ static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE; |
+ static const uint8_t CHARACTER_ARRAY_TERMINATOR; |
+ |
+ static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer, |
+ const uint32_t data, int *const pos) { |
+ buffer[(*pos)++] = (data >> 24) & 0xFF; |
+ buffer[(*pos)++] = (data >> 16) & 0xFF; |
+ buffer[(*pos)++] = (data >> 8) & 0xFF; |
+ buffer[(*pos)++] = data & 0xFF; |
+ } |
+ |
+ static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer, |
+ const uint32_t data, int *const pos) { |
+ buffer[(*pos)++] = (data >> 16) & 0xFF; |
+ buffer[(*pos)++] = (data >> 8) & 0xFF; |
+ buffer[(*pos)++] = data & 0xFF; |
+ } |
+ |
+ static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer, |
+ const uint16_t data, int *const pos) { |
+ buffer[(*pos)++] = (data >> 8) & 0xFF; |
+ buffer[(*pos)++] = data & 0xFF; |
+ } |
+ |
+ static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer, |
+ const uint8_t data, int *const pos) { |
+ buffer[(*pos)++] = data & 0xFF; |
+ } |
+}; |
+} // namespace latinime |
+#endif /* LATINIME_BYTE_ARRAY_UTILS_H */ |