OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2013, The Android Open Source Project |
| 3 * |
| 4 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 * you may not use this file except in compliance with the License. |
| 6 * You may obtain a copy of the License at |
| 7 * |
| 8 * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 * |
| 10 * Unless required by applicable law or agreed to in writing, software |
| 11 * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 * See the License for the specific language governing permissions and |
| 14 * limitations under the License. |
| 15 */ |
| 16 |
| 17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H |
| 18 #define LATINIME_BYTE_ARRAY_UTILS_H |
| 19 |
| 20 #include <cstdint> |
| 21 |
| 22 #include "third_party/android_prediction/defines.h" |
| 23 |
| 24 namespace latinime { |
| 25 |
| 26 /** |
| 27 * Utility methods for reading byte arrays. |
| 28 */ |
| 29 class ByteArrayUtils { |
| 30 public: |
| 31 /** |
| 32 * Integer writing |
| 33 * |
| 34 * Each method write a corresponding size integer in a big endian manner. |
| 35 */ |
| 36 static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffe
r, |
| 37 const uint32_t data, const int size, int *const pos) { |
| 38 // size must be in 1 to 4. |
| 39 ASSERT(size >= 1 && size <= 4); |
| 40 switch (size) { |
| 41 case 1: |
| 42 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos); |
| 43 return; |
| 44 case 2: |
| 45 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos)
; |
| 46 return; |
| 47 case 3: |
| 48 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos)
; |
| 49 return; |
| 50 case 4: |
| 51 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos)
; |
| 52 return; |
| 53 default: |
| 54 break; |
| 55 } |
| 56 } |
| 57 |
| 58 /** |
| 59 * Integer reading |
| 60 * |
| 61 * Each method read a corresponding size integer in a big endian manner. |
| 62 */ |
| 63 static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, cons
t int pos) { |
| 64 return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16) |
| 65 ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3]; |
| 66 } |
| 67 |
| 68 static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, cons
t int pos) { |
| 69 return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2]; |
| 70 } |
| 71 |
| 72 static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, cons
t int pos) { |
| 73 return (buffer[pos] << 8) ^ buffer[pos + 1]; |
| 74 } |
| 75 |
| 76 static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const
int pos) { |
| 77 return buffer[pos]; |
| 78 } |
| 79 |
| 80 static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition( |
| 81 const uint8_t *const buffer, int *const pos) { |
| 82 const uint32_t value = readUint32(buffer, *pos); |
| 83 *pos += 4; |
| 84 return value; |
| 85 } |
| 86 |
| 87 static AK_FORCE_INLINE int readSint24AndAdvancePosition( |
| 88 const uint8_t *const buffer, int *const pos) { |
| 89 const uint8_t value = readUint8(buffer, *pos); |
| 90 if (value < 0x80) { |
| 91 return readUint24AndAdvancePosition(buffer, pos); |
| 92 } else { |
| 93 (*pos)++; |
| 94 return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffe
r, pos)); |
| 95 } |
| 96 } |
| 97 |
| 98 static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition( |
| 99 const uint8_t *const buffer, int *const pos) { |
| 100 const uint32_t value = readUint24(buffer, *pos); |
| 101 *pos += 3; |
| 102 return value; |
| 103 } |
| 104 |
| 105 static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition( |
| 106 const uint8_t *const buffer, int *const pos) { |
| 107 const uint16_t value = readUint16(buffer, *pos); |
| 108 *pos += 2; |
| 109 return value; |
| 110 } |
| 111 |
| 112 static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition( |
| 113 const uint8_t *const buffer, int *const pos) { |
| 114 return buffer[(*pos)++]; |
| 115 } |
| 116 |
| 117 static AK_FORCE_INLINE int readUint(const uint8_t *const buffer, |
| 118 const int size, const int pos) { |
| 119 // size must be in 1 to 4. |
| 120 ASSERT(size >= 1 && size <= 4); |
| 121 switch (size) { |
| 122 case 1: |
| 123 return ByteArrayUtils::readUint8(buffer, pos); |
| 124 case 2: |
| 125 return ByteArrayUtils::readUint16(buffer, pos); |
| 126 case 3: |
| 127 return ByteArrayUtils::readUint24(buffer, pos); |
| 128 case 4: |
| 129 return ByteArrayUtils::readUint32(buffer, pos); |
| 130 default: |
| 131 return 0; |
| 132 } |
| 133 } |
| 134 |
| 135 /** |
| 136 * Code Point Reading |
| 137 * |
| 138 * 1 byte = bbbbbbbb match |
| 139 * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte |
| 140 * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant c
hoice because |
| 141 * unicode code points range from 0 to 0x10FFFF, so any 3-byte value s
tarting with |
| 142 * 00011111 would be outside unicode. |
| 143 * else: iso-latin-1 code |
| 144 * This allows for the whole unicode range to be encoded, including chars ou
tside of |
| 145 * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, excep
t control |
| 146 * characters which should never happen anyway (and still work, but take 3 b
ytes). |
| 147 */ |
| 148 static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const
int pos) { |
| 149 int p = pos; |
| 150 return readCodePointAndAdvancePosition(buffer, &p); |
| 151 } |
| 152 |
| 153 static AK_FORCE_INLINE int readCodePointAndAdvancePosition( |
| 154 const uint8_t *const buffer, int *const pos) { |
| 155 const uint8_t firstByte = readUint8(buffer, *pos); |
| 156 if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) { |
| 157 if (firstByte == CHARACTER_ARRAY_TERMINATOR) { |
| 158 *pos += 1; |
| 159 return NOT_A_CODE_POINT; |
| 160 } else { |
| 161 return readUint24AndAdvancePosition(buffer, pos); |
| 162 } |
| 163 } else { |
| 164 *pos += 1; |
| 165 return firstByte; |
| 166 } |
| 167 } |
| 168 |
| 169 /** |
| 170 * String (array of code points) Reading |
| 171 * |
| 172 * Reads code points until the terminator is found. |
| 173 */ |
| 174 // Returns the length of the string. |
| 175 static int readStringAndAdvancePosition(const uint8_t *const buffer, |
| 176 const int maxLength, int *const outBuffer, int *const pos) { |
| 177 int length = 0; |
| 178 int codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 179 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { |
| 180 outBuffer[length++] = codePoint; |
| 181 codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 182 } |
| 183 return length; |
| 184 } |
| 185 |
| 186 // Advances the position and returns the length of the string. |
| 187 static int advancePositionToBehindString( |
| 188 const uint8_t *const buffer, const int maxLength, int *const pos) { |
| 189 int length = 0; |
| 190 int codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 191 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { |
| 192 codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 193 length++; |
| 194 } |
| 195 return length; |
| 196 } |
| 197 |
| 198 /** |
| 199 * String (array of code points) Writing |
| 200 */ |
| 201 static void writeCodePointsAndAdvancePosition(uint8_t *const buffer, |
| 202 const int *const codePoints, const int codePointCount, const bool wr
itesTerminator, |
| 203 int *const pos) { |
| 204 for (int i = 0; i < codePointCount; ++i) { |
| 205 const int codePoint = codePoints[i]; |
| 206 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TE
RMINATOR) { |
| 207 break; |
| 208 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE |
| 209 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { |
| 210 // three bytes character. |
| 211 writeUint24AndAdvancePosition(buffer, codePoint, pos); |
| 212 } else { |
| 213 // one byte character. |
| 214 writeUint8AndAdvancePosition(buffer, codePoint, pos); |
| 215 } |
| 216 } |
| 217 if (writesTerminator) { |
| 218 writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos
); |
| 219 } |
| 220 } |
| 221 |
| 222 static int calculateRequiredByteCountToStoreCodePoints(const int *const code
Points, |
| 223 const int codePointCount, const bool writesTerminator) { |
| 224 int byteCount = 0; |
| 225 for (int i = 0; i < codePointCount; ++i) { |
| 226 const int codePoint = codePoints[i]; |
| 227 if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TE
RMINATOR) { |
| 228 break; |
| 229 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE |
| 230 || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { |
| 231 // three bytes character. |
| 232 byteCount += 3; |
| 233 } else { |
| 234 // one byte character. |
| 235 byteCount += 1; |
| 236 } |
| 237 } |
| 238 if (writesTerminator) { |
| 239 // The terminator is one byte. |
| 240 byteCount += 1; |
| 241 } |
| 242 return byteCount; |
| 243 } |
| 244 |
| 245 private: |
| 246 DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); |
| 247 |
| 248 static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE; |
| 249 static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE; |
| 250 static const uint8_t CHARACTER_ARRAY_TERMINATOR; |
| 251 |
| 252 static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buf
fer, |
| 253 const uint32_t data, int *const pos) { |
| 254 buffer[(*pos)++] = (data >> 24) & 0xFF; |
| 255 buffer[(*pos)++] = (data >> 16) & 0xFF; |
| 256 buffer[(*pos)++] = (data >> 8) & 0xFF; |
| 257 buffer[(*pos)++] = data & 0xFF; |
| 258 } |
| 259 |
| 260 static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buf
fer, |
| 261 const uint32_t data, int *const pos) { |
| 262 buffer[(*pos)++] = (data >> 16) & 0xFF; |
| 263 buffer[(*pos)++] = (data >> 8) & 0xFF; |
| 264 buffer[(*pos)++] = data & 0xFF; |
| 265 } |
| 266 |
| 267 static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buf
fer, |
| 268 const uint16_t data, int *const pos) { |
| 269 buffer[(*pos)++] = (data >> 8) & 0xFF; |
| 270 buffer[(*pos)++] = data & 0xFF; |
| 271 } |
| 272 |
| 273 static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buff
er, |
| 274 const uint8_t data, int *const pos) { |
| 275 buffer[(*pos)++] = data & 0xFF; |
| 276 } |
| 277 }; |
| 278 } // namespace latinime |
| 279 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */ |
OLD | NEW |