third_party/libphonenumber/cpp/src/utf/rune.c - Issue 6930013: Re-committing http://codereview.chromium.org/6803005/ after fixing multi-dll build:

Unified Diff: third_party/libphonenumber/cpp/src/utf/rune.c

Issue 6930013: Re-committing http://codereview.chromium.org/6803005/ after fixing multi-dll build: (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libphonenumber/cpp/src/utf/rune.c

===================================================================

--- third_party/libphonenumber/cpp/src/utf/rune.c (revision 0)

+++ third_party/libphonenumber/cpp/src/utf/rune.c (revision 0)

@@ -0,0 +1,350 @@

+/*

+ * The authors of this software are Rob Pike and Ken Thompson.

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose without fee is hereby granted, provided that this entire notice

+ * is included in all copies of any software which is or includes a copy

+ * or modification of this software and in all copies of the supporting

+ * documentation for such software.

+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED

+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY

+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY

+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.

+ */

+#include <stdarg.h>

+#include <string.h>

+#include "utf.h"

+#include "utfdef.h"

+enum

+ Bit1 = 7,

+ Bitx = 6,

+ Bit2 = 5,

+ Bit3 = 4,

+ Bit4 = 3,

+ Bit5 = 2,

+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */

+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */

+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */

+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */

+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */

+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */

+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */

+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */

+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */

+ Rune4 = (1<<(Bit4+3*Bitx))-1,

+ /* 0001 1111 1111 1111 1111 1111 */

+ Maskx = (1<<Bitx)-1, /* 0011 1111 */

+ Testx = Maskx ^ 0xFF, /* 1100 0000 */

+ Bad = Runeerror,

+};

+/*

+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24

+ * This is a slower but "safe" version of the old chartorune

+ * that works on strings that are not necessarily null-terminated.

+ *

+ * If you know for sure that your string is null-terminated,

+ * chartorune will be a bit faster.

+ *

+ * It is guaranteed not to attempt to access "length"

+ * past the incoming pointer. This is to avoid

+ * possible access violations. If the string appears to be

+ * well-formed but incomplete (i.e., to get the whole Rune

+ * we'd need to read past str+length) then we'll set the Rune

+ * to Bad and return 0.

+ *

+ * Note that if we have decoding problems for other

+ * reasons, we return 1 instead of 0.

+ */

+int

+charntorune(Rune *rune, const char *str, int length)

+ int c, c1, c2, c3;

+ long l;

+ /* When we're not allowed to read anything */

+ if(length <= 0) {

+ goto badlen;

+ }

+ /*

+ * one character sequence (7-bit value)

+ * 00000-0007F => T1

+ */

+ c = *(uchar*)str;

+ if(c < Tx) {

+ *rune = c;

+ return 1;

+ }

+ // If we can't read more than one character we must stop

+ if(length <= 1) {

+ goto badlen;

+ }

+ /*

+ * two character sequence (11-bit value)

+ * 0080-07FF => T2 Tx

+ */

+ c1 = *(uchar*)(str+1) ^ Tx;

+ if(c1 & Testx)

+ goto bad;

+ if(c < T3) {

+ if(c < T2)

+ goto bad;

+ l = ((c << Bitx) | c1) & Rune2;

+ if(l <= Rune1)

+ goto bad;

+ *rune = l;

+ return 2;

+ }

+ // If we can't read more than two characters we must stop

+ if(length <= 2) {

+ goto badlen;

+ }

+ /*

+ * three character sequence (16-bit value)

+ * 0800-FFFF => T3 Tx Tx

+ */

+ c2 = *(uchar*)(str+2) ^ Tx;

+ if(c2 & Testx)

+ goto bad;

+ if(c < T4) {

+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;

+ if(l <= Rune2)

+ goto bad;

+ *rune = l;

+ return 3;

+ }

+ if (length <= 3)

+ goto badlen;

+ /*

+ * four character sequence (21-bit value)

+ * 10000-1FFFFF => T4 Tx Tx Tx

+ */

+ c3 = *(uchar*)(str+3) ^ Tx;

+ if (c3 & Testx)

+ goto bad;

+ if (c < T5) {

+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;

+ if (l <= Rune3)

+ goto bad;

+ *rune = l;

+ return 4;

+ }

+ // Support for 5-byte or longer UTF-8 would go here, but

+ // since we don't have that, we'll just fall through to bad.

+ /*

+ * bad decoding

+ */

+bad:

+ *rune = Bad;

+ return 1;

+badlen:

+ *rune = Bad;

+ return 0;

+/*

+ * This is the older "unsafe" version, which works fine on

+ * null-terminated strings.

+ */

+int

+chartorune(Rune *rune, const char *str)

+ int c, c1, c2, c3;

+ long l;

+ /*

+ * one character sequence

+ * 00000-0007F => T1

+ */

+ c = *(uchar*)str;

+ if(c < Tx) {

+ *rune = c;

+ return 1;

+ }

+ /*

+ * two character sequence

+ * 0080-07FF => T2 Tx

+ */

+ c1 = *(uchar*)(str+1) ^ Tx;

+ if(c1 & Testx)

+ goto bad;

+ if(c < T3) {

+ if(c < T2)

+ goto bad;

+ l = ((c << Bitx) | c1) & Rune2;

+ if(l <= Rune1)

+ goto bad;

+ *rune = l;

+ return 2;

+ }

+ /*

+ * three character sequence

+ * 0800-FFFF => T3 Tx Tx

+ */

+ c2 = *(uchar*)(str+2) ^ Tx;

+ if(c2 & Testx)

+ goto bad;

+ if(c < T4) {

+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;

+ if(l <= Rune2)

+ goto bad;

+ *rune = l;

+ return 3;

+ }

+ /*

+ * four character sequence (21-bit value)

+ * 10000-1FFFFF => T4 Tx Tx Tx

+ */

+ c3 = *(uchar*)(str+3) ^ Tx;

+ if (c3 & Testx)

+ goto bad;

+ if (c < T5) {

+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;

+ if (l <= Rune3)

+ goto bad;

+ *rune = l;

+ return 4;

+ }

+ /*

+ * Support for 5-byte or longer UTF-8 would go here, but

+ * since we don't have that, we'll just fall through to bad.

+ */

+ /*

+ * bad decoding

+ */

+bad:

+ *rune = Bad;

+ return 1;

+int

+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {

+ *consumed = charntorune(rune, str, length);

+ return *rune != Runeerror || *consumed == 3;

+int

+runetochar(char *str, const Rune *rune)

+ /* Runes are signed, so convert to unsigned for range check. */

+ unsigned long c;

+ /*

+ * one character sequence

+ * 00000-0007F => 00-7F

+ */

+ c = *rune;

+ if(c <= Rune1) {

+ str[0] = c;

+ return 1;

+ }

+ /*

+ * two character sequence

+ * 0080-07FF => T2 Tx

+ */

+ if(c <= Rune2) {

+ str[0] = T2 | (c >> 1*Bitx);

+ str[1] = Tx | (c & Maskx);

+ return 2;

+ }

+ /*

+ * If the Rune is out of range, convert it to the error rune.

+ * Do this test here because the error rune encodes to three bytes.

+ * Doing it earlier would duplicate work, since an out of range

+ * Rune wouldn't have fit in one or two bytes.

+ */

+ if (c > Runemax)

+ c = Runeerror;

+ /*

+ * three character sequence

+ * 0800-FFFF => T3 Tx Tx

+ */

+ if (c <= Rune3) {

+ str[0] = T3 | (c >> 2*Bitx);

+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);

+ str[2] = Tx | (c & Maskx);

+ return 3;

+ }

+ /*

+ * four character sequence (21-bit value)

+ * 10000-1FFFFF => T4 Tx Tx Tx

+ */

+ str[0] = T4 | (c >> 3*Bitx);

+ str[1] = Tx | ((c >> 2*Bitx) & Maskx);

+ str[2] = Tx | ((c >> 1*Bitx) & Maskx);

+ str[3] = Tx | (c & Maskx);

+ return 4;

+int

+runelen(Rune rune)

+ char str[10];

+ return runetochar(str, &rune);

+int

+runenlen(const Rune *r, int nrune)

+ int nb, c;

+ nb = 0;

+ while(nrune--) {

+ c = *r++;

+ if (c <= Rune1)

+ nb++;

+ else if (c <= Rune2)

+ nb += 2;

+ else if (c <= Rune3)

+ nb += 3;

+ else /* assert(c <= Rune4) */

+ nb += 4;

+ }

+ return nb;

+int

+fullrune(const char *str, int n)

+ if (n > 0) {

+ int c = *(uchar*)str;

+ if (c < Tx)

+ return 1;

+ if (n > 1) {

+ if (c < T3)

+ return 1;

+ if (n > 2) {

+ if (c < T4 || n > 3)

+ return 1;

+ }

+ return 0;

Property changes on: third_party\libphonenumber\cpp\src\utf\rune.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « third_party/libphonenumber/cpp/src/utf/README ('k') | third_party/libphonenumber/cpp/src/utf/stringpiece.h » ('j') | no next file with comments »