| Index: third_party/harfbuzz-ng/src/hb-utf-private.hh
|
| diff --git a/third_party/harfbuzz-ng/src/hb-utf-private.hh b/third_party/harfbuzz-ng/src/hb-utf-private.hh
|
| index b9a6519d28e0d13b133feea8a61fdd93ebd50d8b..0b798a05c30c5ea1fc0931aa13b44085c7a7f403 100644
|
| --- a/third_party/harfbuzz-ng/src/hb-utf-private.hh
|
| +++ b/third_party/harfbuzz-ng/src/hb-utf-private.hh
|
| @@ -1,5 +1,5 @@
|
| /*
|
| - * Copyright © 2011,2012 Google, Inc.
|
| + * Copyright © 2011,2012,2014 Google, Inc.
|
| *
|
| * This is part of HarfBuzz, a text shaping library.
|
| *
|
| @@ -29,176 +29,221 @@
|
|
|
| #include "hb-private.hh"
|
|
|
| +template <typename T, bool validate=true> struct hb_utf_t;
|
| +
|
|
|
| /* UTF-8 */
|
|
|
| -#define HB_UTF8_COMPUTE(Char, Mask, Len) \
|
| - if (Char < 128) { Len = 1; Mask = 0x7f; } \
|
| - else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \
|
| - else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \
|
| - else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \
|
| - else Len = 0;
|
| -
|
| -static inline const uint8_t *
|
| -hb_utf_next (const uint8_t *text,
|
| - const uint8_t *end,
|
| - hb_codepoint_t *unicode)
|
| +template <>
|
| +struct hb_utf_t<uint8_t, true>
|
| {
|
| - hb_codepoint_t c = *text, mask;
|
| - unsigned int len;
|
| -
|
| - /* TODO check for overlong sequences? */
|
| -
|
| - HB_UTF8_COMPUTE (c, mask, len);
|
| - if (unlikely (!len || (unsigned int) (end - text) < len)) {
|
| - *unicode = -1;
|
| - return text + 1;
|
| - } else {
|
| - hb_codepoint_t result;
|
| - unsigned int i;
|
| - result = c & mask;
|
| - for (i = 1; i < len; i++)
|
| + static inline const uint8_t *
|
| + next (const uint8_t *text,
|
| + const uint8_t *end,
|
| + hb_codepoint_t *unicode,
|
| + hb_codepoint_t replacement)
|
| + {
|
| + /* Written to only accept well-formed sequences.
|
| + * Based on ideas from ICU's U8_NEXT.
|
| + * Generates one "replacement" for each ill-formed byte. */
|
| +
|
| + hb_codepoint_t c = *text++;
|
| +
|
| + if (c > 0x7Fu)
|
| + {
|
| + if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */
|
| {
|
| - if (unlikely ((text[i] & 0xc0) != 0x80))
|
| - {
|
| - *unicode = -1;
|
| - return text + 1;
|
| - }
|
| - result <<= 6;
|
| - result |= (text[i] & 0x3f);
|
| + unsigned int t1;
|
| + if (likely (text < end &&
|
| + (t1 = text[0] - 0x80u) <= 0x3Fu))
|
| + {
|
| + c = ((c&0x1Fu)<<6) | t1;
|
| + text++;
|
| + }
|
| + else
|
| + goto error;
|
| }
|
| - *unicode = result;
|
| - return text + len;
|
| - }
|
| -}
|
| + else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */
|
| + {
|
| + unsigned int t1, t2;
|
| + if (likely (1 < end - text &&
|
| + (t1 = text[0] - 0x80u) <= 0x3Fu &&
|
| + (t2 = text[1] - 0x80u) <= 0x3Fu))
|
| + {
|
| + c = ((c&0xFu)<<12) | (t1<<6) | t2;
|
| + if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu)))
|
| + goto error;
|
| + text += 2;
|
| + }
|
| + else
|
| + goto error;
|
| + }
|
| + else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */
|
| + {
|
| + unsigned int t1, t2, t3;
|
| + if (likely (2 < end - text &&
|
| + (t1 = text[0] - 0x80u) <= 0x3Fu &&
|
| + (t2 = text[1] - 0x80u) <= 0x3Fu &&
|
| + (t3 = text[2] - 0x80u) <= 0x3Fu))
|
| + {
|
| + c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
|
| + if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu)))
|
| + goto error;
|
| + text += 3;
|
| + }
|
| + else
|
| + goto error;
|
| + }
|
| + else
|
| + goto error;
|
| + }
|
|
|
| -static inline const uint8_t *
|
| -hb_utf_prev (const uint8_t *text,
|
| - const uint8_t *start,
|
| - hb_codepoint_t *unicode)
|
| -{
|
| - const uint8_t *end = text--;
|
| - while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
|
| - text--;
|
| + *unicode = c;
|
| + return text;
|
| +
|
| + error:
|
| + *unicode = replacement;
|
| + return text;
|
| + }
|
|
|
| - hb_codepoint_t c = *text, mask;
|
| - unsigned int len;
|
| + static inline const uint8_t *
|
| + prev (const uint8_t *text,
|
| + const uint8_t *start,
|
| + hb_codepoint_t *unicode,
|
| + hb_codepoint_t replacement)
|
| + {
|
| + const uint8_t *end = text--;
|
| + while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
|
| + text--;
|
|
|
| - /* TODO check for overlong sequences? */
|
| + if (likely (next (text, end, unicode, replacement) == end))
|
| + return text;
|
|
|
| - HB_UTF8_COMPUTE (c, mask, len);
|
| - if (unlikely (!len || (unsigned int) (end - text) != len)) {
|
| - *unicode = -1;
|
| + *unicode = replacement;
|
| return end - 1;
|
| - } else {
|
| - hb_codepoint_t result;
|
| - unsigned int i;
|
| - result = c & mask;
|
| - for (i = 1; i < len; i++)
|
| - {
|
| - result <<= 6;
|
| - result |= (text[i] & 0x3f);
|
| - }
|
| - *unicode = result;
|
| - return text;
|
| }
|
| -}
|
| -
|
|
|
| -static inline unsigned int
|
| -hb_utf_strlen (const uint8_t *text)
|
| -{
|
| - return strlen ((const char *) text);
|
| -}
|
| + static inline unsigned int
|
| + strlen (const uint8_t *text)
|
| + {
|
| + return ::strlen ((const char *) text);
|
| + }
|
| +};
|
|
|
|
|
| /* UTF-16 */
|
|
|
| -static inline const uint16_t *
|
| -hb_utf_next (const uint16_t *text,
|
| - const uint16_t *end,
|
| - hb_codepoint_t *unicode)
|
| +template <>
|
| +struct hb_utf_t<uint16_t, true>
|
| {
|
| - hb_codepoint_t c = *text++;
|
| -
|
| - if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff)))
|
| + static inline const uint16_t *
|
| + next (const uint16_t *text,
|
| + const uint16_t *end,
|
| + hb_codepoint_t *unicode,
|
| + hb_codepoint_t replacement)
|
| {
|
| - /* high surrogate */
|
| - hb_codepoint_t l;
|
| - if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff))))
|
| + hb_codepoint_t c = *text++;
|
| +
|
| + if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
|
| {
|
| - /* low surrogate */
|
| - *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00);
|
| - text++;
|
| - } else
|
| - *unicode = -1;
|
| - } else
|
| - *unicode = c;
|
| + *unicode = c;
|
| + return text;
|
| + }
|
|
|
| - return text;
|
| -}
|
| + if (likely (hb_in_range (c, 0xD800u, 0xDBFFu)))
|
| + {
|
| + /* High-surrogate in c */
|
| + hb_codepoint_t l;
|
| + if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu))))
|
| + {
|
| + /* Low-surrogate in l */
|
| + *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
|
| + text++;
|
| + return text;
|
| + }
|
| + }
|
|
|
| -static inline const uint16_t *
|
| -hb_utf_prev (const uint16_t *text,
|
| - const uint16_t *start,
|
| - hb_codepoint_t *unicode)
|
| -{
|
| - hb_codepoint_t c = *--text;
|
| + /* Lonely / out-of-order surrogate. */
|
| + *unicode = replacement;
|
| + return text;
|
| + }
|
|
|
| - if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff)))
|
| + static inline const uint16_t *
|
| + prev (const uint16_t *text,
|
| + const uint16_t *start,
|
| + hb_codepoint_t *unicode,
|
| + hb_codepoint_t replacement)
|
| {
|
| - /* low surrogate */
|
| - hb_codepoint_t h;
|
| - if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff))))
|
| + const uint16_t *end = text--;
|
| + hb_codepoint_t c = *text;
|
| +
|
| + if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
|
| {
|
| - /* high surrogate */
|
| - *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00);
|
| - text--;
|
| - } else
|
| - *unicode = -1;
|
| - } else
|
| - *unicode = c;
|
| + *unicode = c;
|
| + return text;
|
| + }
|
|
|
| - return text;
|
| -}
|
| + if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu)))
|
| + text--;
|
|
|
| + if (likely (next (text, end, unicode, replacement) == end))
|
| + return text;
|
|
|
| -static inline unsigned int
|
| -hb_utf_strlen (const uint16_t *text)
|
| -{
|
| - unsigned int l = 0;
|
| - while (*text++) l++;
|
| - return l;
|
| -}
|
| + *unicode = replacement;
|
| + return end - 1;
|
| + }
|
| +
|
| +
|
| + static inline unsigned int
|
| + strlen (const uint16_t *text)
|
| + {
|
| + unsigned int l = 0;
|
| + while (*text++) l++;
|
| + return l;
|
| + }
|
| +};
|
|
|
|
|
| /* UTF-32 */
|
|
|
| -static inline const uint32_t *
|
| -hb_utf_next (const uint32_t *text,
|
| - const uint32_t *end HB_UNUSED,
|
| - hb_codepoint_t *unicode)
|
| +template <bool validate>
|
| +struct hb_utf_t<uint32_t, validate>
|
| {
|
| - *unicode = *text++;
|
| - return text;
|
| -}
|
| -
|
| -static inline const uint32_t *
|
| -hb_utf_prev (const uint32_t *text,
|
| - const uint32_t *start HB_UNUSED,
|
| - hb_codepoint_t *unicode)
|
| -{
|
| - *unicode = *--text;
|
| - return text;
|
| -}
|
| + static inline const uint32_t *
|
| + next (const uint32_t *text,
|
| + const uint32_t *end HB_UNUSED,
|
| + hb_codepoint_t *unicode,
|
| + hb_codepoint_t replacement)
|
| + {
|
| + hb_codepoint_t c = *text++;
|
| + if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu)))
|
| + goto error;
|
| + *unicode = c;
|
| + return text;
|
|
|
| -static inline unsigned int
|
| -hb_utf_strlen (const uint32_t *text)
|
| -{
|
| - unsigned int l = 0;
|
| - while (*text++) l++;
|
| - return l;
|
| -}
|
| + error:
|
| + *unicode = replacement;
|
| + return text;
|
| + }
|
| +
|
| + static inline const uint32_t *
|
| + prev (const uint32_t *text,
|
| + const uint32_t *start HB_UNUSED,
|
| + hb_codepoint_t *unicode,
|
| + hb_codepoint_t replacement)
|
| + {
|
| + next (text - 1, text, unicode, replacement);
|
| + return text - 1;
|
| + }
|
| +
|
| + static inline unsigned int
|
| + strlen (const uint32_t *text)
|
| + {
|
| + unsigned int l = 0;
|
| + while (*text++) l++;
|
| + return l;
|
| + }
|
| +};
|
|
|
|
|
| #endif /* HB_UTF_PRIVATE_HH */
|
|
|