| OLD | NEW |
| (Empty) | |
| 1 /* |
| 2 * Copyright (C) 2013, The Android Open Source Project |
| 3 * |
| 4 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 * you may not use this file except in compliance with the License. |
| 6 * You may obtain a copy of the License at |
| 7 * |
| 8 * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 * |
| 10 * Unless required by applicable law or agreed to in writing, software |
| 11 * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 * See the License for the specific language governing permissions and |
| 14 * limitations under the License. |
| 15 */ |
| 16 |
| 17 #ifndef LATINIME_BYTE_ARRAY_UTILS_H |
| 18 #define LATINIME_BYTE_ARRAY_UTILS_H |
| 19 |
| 20 #include <cstdint> |
| 21 |
| 22 #include "third_party/prediction/defines.h" |
| 23 |
| 24 namespace latinime { |
| 25 |
| 26 /** |
| 27 * Utility methods for reading byte arrays. |
| 28 */ |
| 29 class ByteArrayUtils { |
| 30 public: |
| 31 /** |
| 32 * Integer writing |
| 33 * |
| 34 * Each method write a corresponding size integer in a big endian manner. |
| 35 */ |
| 36 static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t* const buffer, |
| 37 const uint32_t data, |
| 38 const int size, |
| 39 int* const pos) { |
| 40 // size must be in 1 to 4. |
| 41 ASSERT(size >= 1 && size <= 4); |
| 42 switch (size) { |
| 43 case 1: |
| 44 ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos); |
| 45 return; |
| 46 case 2: |
| 47 ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos); |
| 48 return; |
| 49 case 3: |
| 50 ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos); |
| 51 return; |
| 52 case 4: |
| 53 ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos); |
| 54 return; |
| 55 default: |
| 56 break; |
| 57 } |
| 58 } |
| 59 |
| 60 /** |
| 61 * Integer reading |
| 62 * |
| 63 * Each method read a corresponding size integer in a big endian manner. |
| 64 */ |
| 65 static AK_FORCE_INLINE uint32_t |
| 66 readUint32(const uint8_t* const buffer, const int pos) { |
| 67 return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16) ^ |
| 68 (buffer[pos + 2] << 8) ^ buffer[pos + 3]; |
| 69 } |
| 70 |
| 71 static AK_FORCE_INLINE uint32_t |
| 72 readUint24(const uint8_t* const buffer, const int pos) { |
| 73 return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2]; |
| 74 } |
| 75 |
| 76 static AK_FORCE_INLINE uint16_t |
| 77 readUint16(const uint8_t* const buffer, const int pos) { |
| 78 return (buffer[pos] << 8) ^ buffer[pos + 1]; |
| 79 } |
| 80 |
| 81 static AK_FORCE_INLINE uint8_t |
| 82 readUint8(const uint8_t* const buffer, const int pos) { |
| 83 return buffer[pos]; |
| 84 } |
| 85 |
| 86 static AK_FORCE_INLINE uint32_t |
| 87 readUint32AndAdvancePosition(const uint8_t* const buffer, int* const pos) { |
| 88 const uint32_t value = readUint32(buffer, *pos); |
| 89 *pos += 4; |
| 90 return value; |
| 91 } |
| 92 |
| 93 static AK_FORCE_INLINE int readSint24AndAdvancePosition( |
| 94 const uint8_t* const buffer, |
| 95 int* const pos) { |
| 96 const uint8_t value = readUint8(buffer, *pos); |
| 97 if (value < 0x80) { |
| 98 return readUint24AndAdvancePosition(buffer, pos); |
| 99 } else { |
| 100 (*pos)++; |
| 101 return -(((value & 0x7F) << 16) ^ |
| 102 readUint16AndAdvancePosition(buffer, pos)); |
| 103 } |
| 104 } |
| 105 |
| 106 static AK_FORCE_INLINE uint32_t |
| 107 readUint24AndAdvancePosition(const uint8_t* const buffer, int* const pos) { |
| 108 const uint32_t value = readUint24(buffer, *pos); |
| 109 *pos += 3; |
| 110 return value; |
| 111 } |
| 112 |
| 113 static AK_FORCE_INLINE uint16_t |
| 114 readUint16AndAdvancePosition(const uint8_t* const buffer, int* const pos) { |
| 115 const uint16_t value = readUint16(buffer, *pos); |
| 116 *pos += 2; |
| 117 return value; |
| 118 } |
| 119 |
| 120 static AK_FORCE_INLINE uint8_t |
| 121 readUint8AndAdvancePosition(const uint8_t* const buffer, int* const pos) { |
| 122 return buffer[(*pos)++]; |
| 123 } |
| 124 |
| 125 static AK_FORCE_INLINE int readUint(const uint8_t* const buffer, |
| 126 const int size, |
| 127 const int pos) { |
| 128 // size must be in 1 to 4. |
| 129 ASSERT(size >= 1 && size <= 4); |
| 130 switch (size) { |
| 131 case 1: |
| 132 return ByteArrayUtils::readUint8(buffer, pos); |
| 133 case 2: |
| 134 return ByteArrayUtils::readUint16(buffer, pos); |
| 135 case 3: |
| 136 return ByteArrayUtils::readUint24(buffer, pos); |
| 137 case 4: |
| 138 return ByteArrayUtils::readUint32(buffer, pos); |
| 139 default: |
| 140 return 0; |
| 141 } |
| 142 } |
| 143 |
| 144 /** |
| 145 * Code Point Reading |
| 146 * |
| 147 * 1 byte = bbbbbbbb match |
| 148 * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte |
| 149 * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant |
| 150 *choice because |
| 151 * unicode code points range from 0 to 0x10FFFF, so any 3-byte value |
| 152 *starting with |
| 153 * 00011111 would be outside unicode. |
| 154 * else: iso-latin-1 code |
| 155 * This allows for the whole unicode range to be encoded, including chars |
| 156 *outside of |
| 157 * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except |
| 158 *control |
| 159 * characters which should never happen anyway (and still work, but take 3 |
| 160 *bytes). |
| 161 */ |
| 162 static AK_FORCE_INLINE int readCodePoint(const uint8_t* const buffer, |
| 163 const int pos) { |
| 164 int p = pos; |
| 165 return readCodePointAndAdvancePosition(buffer, &p); |
| 166 } |
| 167 |
| 168 static AK_FORCE_INLINE int readCodePointAndAdvancePosition( |
| 169 const uint8_t* const buffer, |
| 170 int* const pos) { |
| 171 const uint8_t firstByte = readUint8(buffer, *pos); |
| 172 if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) { |
| 173 if (firstByte == CHARACTER_ARRAY_TERMINATOR) { |
| 174 *pos += 1; |
| 175 return NOT_A_CODE_POINT; |
| 176 } else { |
| 177 return readUint24AndAdvancePosition(buffer, pos); |
| 178 } |
| 179 } else { |
| 180 *pos += 1; |
| 181 return firstByte; |
| 182 } |
| 183 } |
| 184 |
| 185 /** |
| 186 * String (array of code points) Reading |
| 187 * |
| 188 * Reads code points until the terminator is found. |
| 189 */ |
| 190 // Returns the length of the string. |
| 191 static int readStringAndAdvancePosition(const uint8_t* const buffer, |
| 192 const int maxLength, |
| 193 int* const outBuffer, |
| 194 int* const pos) { |
| 195 int length = 0; |
| 196 int codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 197 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { |
| 198 outBuffer[length++] = codePoint; |
| 199 codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 200 } |
| 201 return length; |
| 202 } |
| 203 |
| 204 // Advances the position and returns the length of the string. |
| 205 static int advancePositionToBehindString(const uint8_t* const buffer, |
| 206 const int maxLength, |
| 207 int* const pos) { |
| 208 int length = 0; |
| 209 int codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 210 while (NOT_A_CODE_POINT != codePoint && length < maxLength) { |
| 211 codePoint = readCodePointAndAdvancePosition(buffer, pos); |
| 212 length++; |
| 213 } |
| 214 return length; |
| 215 } |
| 216 |
| 217 /** |
| 218 * String (array of code points) Writing |
| 219 */ |
| 220 static void writeCodePointsAndAdvancePosition(uint8_t* const buffer, |
| 221 const int* const codePoints, |
| 222 const int codePointCount, |
| 223 const bool writesTerminator, |
| 224 int* const pos) { |
| 225 for (int i = 0; i < codePointCount; ++i) { |
| 226 const int codePoint = codePoints[i]; |
| 227 if (codePoint == NOT_A_CODE_POINT || |
| 228 codePoint == CHARACTER_ARRAY_TERMINATOR) { |
| 229 break; |
| 230 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE || |
| 231 codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { |
| 232 // three bytes character. |
| 233 writeUint24AndAdvancePosition(buffer, codePoint, pos); |
| 234 } else { |
| 235 // one byte character. |
| 236 writeUint8AndAdvancePosition(buffer, codePoint, pos); |
| 237 } |
| 238 } |
| 239 if (writesTerminator) { |
| 240 writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos); |
| 241 } |
| 242 } |
| 243 |
| 244 static int calculateRequiredByteCountToStoreCodePoints( |
| 245 const int* const codePoints, |
| 246 const int codePointCount, |
| 247 const bool writesTerminator) { |
| 248 int byteCount = 0; |
| 249 for (int i = 0; i < codePointCount; ++i) { |
| 250 const int codePoint = codePoints[i]; |
| 251 if (codePoint == NOT_A_CODE_POINT || |
| 252 codePoint == CHARACTER_ARRAY_TERMINATOR) { |
| 253 break; |
| 254 } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE || |
| 255 codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { |
| 256 // three bytes character. |
| 257 byteCount += 3; |
| 258 } else { |
| 259 // one byte character. |
| 260 byteCount += 1; |
| 261 } |
| 262 } |
| 263 if (writesTerminator) { |
| 264 // The terminator is one byte. |
| 265 byteCount += 1; |
| 266 } |
| 267 return byteCount; |
| 268 } |
| 269 |
| 270 private: |
| 271 DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); |
| 272 |
| 273 static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE; |
| 274 static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE; |
| 275 static const uint8_t CHARACTER_ARRAY_TERMINATOR; |
| 276 |
| 277 static AK_FORCE_INLINE void writeUint32AndAdvancePosition( |
| 278 uint8_t* const buffer, |
| 279 const uint32_t data, |
| 280 int* const pos) { |
| 281 buffer[(*pos)++] = (data >> 24) & 0xFF; |
| 282 buffer[(*pos)++] = (data >> 16) & 0xFF; |
| 283 buffer[(*pos)++] = (data >> 8) & 0xFF; |
| 284 buffer[(*pos)++] = data & 0xFF; |
| 285 } |
| 286 |
| 287 static AK_FORCE_INLINE void writeUint24AndAdvancePosition( |
| 288 uint8_t* const buffer, |
| 289 const uint32_t data, |
| 290 int* const pos) { |
| 291 buffer[(*pos)++] = (data >> 16) & 0xFF; |
| 292 buffer[(*pos)++] = (data >> 8) & 0xFF; |
| 293 buffer[(*pos)++] = data & 0xFF; |
| 294 } |
| 295 |
| 296 static AK_FORCE_INLINE void writeUint16AndAdvancePosition( |
| 297 uint8_t* const buffer, |
| 298 const uint16_t data, |
| 299 int* const pos) { |
| 300 buffer[(*pos)++] = (data >> 8) & 0xFF; |
| 301 buffer[(*pos)++] = data & 0xFF; |
| 302 } |
| 303 |
| 304 static AK_FORCE_INLINE void writeUint8AndAdvancePosition( |
| 305 uint8_t* const buffer, |
| 306 const uint8_t data, |
| 307 int* const pos) { |
| 308 buffer[(*pos)++] = data & 0xFF; |
| 309 } |
| 310 }; |
| 311 } // namespace latinime |
| 312 #endif /* LATINIME_BYTE_ARRAY_UTILS_H */ |
| OLD | NEW |