| OLD | NEW |
| (Empty) |
| 1 // Protocol Buffers - Google's data interchange format | |
| 2 // Copyright 2008 Google Inc. All rights reserved. | |
| 3 // https://developers.google.com/protocol-buffers/ | |
| 4 // | |
| 5 // Redistribution and use in source and binary forms, with or without | |
| 6 // modification, are permitted provided that the following conditions are | |
| 7 // met: | |
| 8 // | |
| 9 // * Redistributions of source code must retain the above copyright | |
| 10 // notice, this list of conditions and the following disclaimer. | |
| 11 // * Redistributions in binary form must reproduce the above | |
| 12 // copyright notice, this list of conditions and the following disclaimer | |
| 13 // in the documentation and/or other materials provided with the | |
| 14 // distribution. | |
| 15 // * Neither the name of Google Inc. nor the names of its | |
| 16 // contributors may be used to endorse or promote products derived from | |
| 17 // this software without specific prior written permission. | |
| 18 // | |
| 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 30 | |
| 31 #include <stdbool.h> | |
| 32 #include <stdint.h> | |
| 33 | |
| 34 #include "utf8.h" | |
| 35 | |
| 36 static const uint8_t utf8_offset[] = { | |
| 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 42 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 45 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 46 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 47 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 48 }; | |
| 49 | |
| 50 bool is_structurally_valid_utf8(const char* buf, int len) { | |
| 51 int i, j; | |
| 52 uint8_t offset; | |
| 53 | |
| 54 i = 0; | |
| 55 while (i < len) { | |
| 56 offset = utf8_offset[(uint8_t)buf[i]]; | |
| 57 if (offset == 0 || i + offset > len) { | |
| 58 return false; | |
| 59 } | |
| 60 for (j = i + 1; j < i + offset; j++) { | |
| 61 if ((buf[j] & 0xc0) != 0x80) { | |
| 62 return false; | |
| 63 } | |
| 64 } | |
| 65 i += offset; | |
| 66 } | |
| 67 return i == len; | |
| 68 } | |
| OLD | NEW |