third_party/libphonenumber/cpp/src/utf/rune.c - Issue 6920006: Revert 84000 - Autofill phone number enhancements and integration of Phone Number Util Library: p...

Side by Side Diff: third_party/libphonenumber/cpp/src/utf/rune.c

Issue 6920006: Revert 84000 - Autofill phone number enhancements and integration of Phone Number Util Library: p... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 * The authors of this software are Rob Pike and Ken Thompson.

3 * Copyright (c) 2002 by Lucent Technologies.

4 * Permission to use, copy, modify, and distribute this software for any

5 * purpose without fee is hereby granted, provided that this entire notice

6 * is included in all copies of any software which is or includes a copy

7 * or modification of this software and in all copies of the supporting

8 * documentation for such software.

9 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED

10 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE AN Y

11 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY

12 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.

13 */

14 #include <stdarg.h>

15 #include <string.h>

16 #include "utf.h"

17 #include "utfdef.h"

18

19 enum

20 {

21 Bit1 = 7,

22 Bitx = 6,

23 Bit2 = 5,

24 Bit3 = 4,

25 Bit4 = 3,

26 Bit5 = 2,

27

28 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */

29 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */

30 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */

31 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */

32 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */

33 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */

34

35 Rune1 = (1<<(Bit1+0Bitx))-1, / 0000 0000 0111 1111 */

36 Rune2 = (1<<(Bit2+1Bitx))-1, / 0000 0111 1111 1111 */

37 Rune3 = (1<<(Bit3+2Bitx))-1, / 1111 1111 1111 1111 */

38 Rune4 = (1<<(Bit4+3*Bitx))-1,

39 /* 0001 1111 1111 1111 1111 1111 */

40

41 Maskx = (1<<Bitx)-1, /* 0011 1111 */

42 Testx = Maskx ^ 0xFF, /* 1100 0000 */

43

44 Bad = Runeerror,

45 };

46

47 /*

48 * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24

49 * This is a slower but "safe" version of the old chartorune

50 * that works on strings that are not necessarily null-terminated.

51 *

52 * If you know for sure that your string is null-terminated,

53 * chartorune will be a bit faster.

54 *

55 * It is guaranteed not to attempt to access "length"

56 * past the incoming pointer. This is to avoid

57 * possible access violations. If the string appears to be

58 * well-formed but incomplete (i.e., to get the whole Rune

59 * we'd need to read past str+length) then we'll set the Rune

60 * to Bad and return 0.

61 *

62 * Note that if we have decoding problems for other

63 * reasons, we return 1 instead of 0.

64 */

65 int

66 charntorune(Rune rune, const char str, int length)

67 {

68 int c, c1, c2, c3;

69 long l;

70

71 /* When we're not allowed to read anything */

72 if(length <= 0) {

73 goto badlen;

74 }

75

76 /*

77 * one character sequence (7-bit value)

78 * 00000-0007F => T1

79 */

80 c = (uchar)str;

81 if(c < Tx) {

82 *rune = c;

83 return 1;

84 }

85

86 // If we can't read more than one character we must stop

87 if(length <= 1) {

88 goto badlen;

89 }

90

91 /*

92 * two character sequence (11-bit value)

93 * 0080-07FF => T2 Tx

94 */

95 c1 = (uchar)(str+1) ^ Tx;

96 if(c1 & Testx)

97 goto bad;

98 if(c < T3) {

99 if(c < T2)

100 goto bad;

101 l = ((c << Bitx) \| c1) & Rune2;

102 if(l <= Rune1)

103 goto bad;

104 *rune = l;

105 return 2;

106 }

107

108 // If we can't read more than two characters we must stop

109 if(length <= 2) {

110 goto badlen;

111 }

112

113 /*

114 * three character sequence (16-bit value)

115 * 0800-FFFF => T3 Tx Tx

116 */

117 c2 = (uchar)(str+2) ^ Tx;

118 if(c2 & Testx)

119 goto bad;

120 if(c < T4) {

121 l = ((((c << Bitx) \| c1) << Bitx) \| c2) & Rune3;

122 if(l <= Rune2)

123 goto bad;

124 *rune = l;

125 return 3;

126 }

127

128 if (length <= 3)

129 goto badlen;

130

131 /*

132 * four character sequence (21-bit value)

133 * 10000-1FFFFF => T4 Tx Tx Tx

134 */

135 c3 = (uchar)(str+3) ^ Tx;

136 if (c3 & Testx)

137 goto bad;

138 if (c < T5) {

139 l = ((((((c << Bitx) \| c1) << Bitx) \| c2) << Bitx) \| c3) & Rune4 ;

140 if (l <= Rune3)

141 goto bad;

142 *rune = l;

143 return 4;

144 }

145

146 // Support for 5-byte or longer UTF-8 would go here, but

147 // since we don't have that, we'll just fall through to bad.

148

149 /*

150 * bad decoding

151 */

152 bad:

153 *rune = Bad;

154 return 1;

155 badlen:

156 *rune = Bad;

157 return 0;

158

159 }

160

161

162 /*

163 * This is the older "unsafe" version, which works fine on

164 * null-terminated strings.

165 */

166 int

167 chartorune(Rune rune, const char str)

168 {

169 int c, c1, c2, c3;

170 long l;

171

172 /*

173 * one character sequence

174 * 00000-0007F => T1

175 */

176 c = (uchar)str;

177 if(c < Tx) {

178 *rune = c;

179 return 1;

180 }

181

182 /*

183 * two character sequence

184 * 0080-07FF => T2 Tx

185 */

186 c1 = (uchar)(str+1) ^ Tx;

187 if(c1 & Testx)

188 goto bad;

189 if(c < T3) {

190 if(c < T2)

191 goto bad;

192 l = ((c << Bitx) \| c1) & Rune2;

193 if(l <= Rune1)

194 goto bad;

195 *rune = l;

196 return 2;

197 }

198

199 /*

200 * three character sequence

201 * 0800-FFFF => T3 Tx Tx

202 */

203 c2 = (uchar)(str+2) ^ Tx;

204 if(c2 & Testx)

205 goto bad;

206 if(c < T4) {

207 l = ((((c << Bitx) \| c1) << Bitx) \| c2) & Rune3;

208 if(l <= Rune2)

209 goto bad;

210 *rune = l;

211 return 3;

212 }

213

214 /*

215 * four character sequence (21-bit value)

216 * 10000-1FFFFF => T4 Tx Tx Tx

217 */

218 c3 = (uchar)(str+3) ^ Tx;

219 if (c3 & Testx)

220 goto bad;

221 if (c < T5) {

222 l = ((((((c << Bitx) \| c1) << Bitx) \| c2) << Bitx) \| c3) & Rune4 ;

223 if (l <= Rune3)

224 goto bad;

225 *rune = l;

226 return 4;

227 }

228

229 /*

230 * Support for 5-byte or longer UTF-8 would go here, but

231 * since we don't have that, we'll just fall through to bad.

232 */

233

234 /*

235 * bad decoding

236 */

237 bad:

238 *rune = Bad;

239 return 1;

240 }

241

242 int

243 isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {

244 *consumed = charntorune(rune, str, length);

245 return rune != Runeerror \|\| consumed == 3;

246 }

247

248 int

249 runetochar(char str, const Rune rune)

250 {

251 /* Runes are signed, so convert to unsigned for range check. */

252 unsigned long c;

253

254 /*

255 * one character sequence

256 * 00000-0007F => 00-7F

257 */

258 c = *rune;

259 if(c <= Rune1) {

260 str[0] = c;

261 return 1;

262 }

263

264 /*

265 * two character sequence

266 * 0080-07FF => T2 Tx

267 */

268 if(c <= Rune2) {

269 str[0] = T2 \| (c >> 1*Bitx);

270 str[1] = Tx \| (c & Maskx);

271 return 2;

272 }

273

274 /*

275 * If the Rune is out of range, convert it to the error rune.

276 * Do this test here because the error rune encodes to three bytes.

277 * Doing it earlier would duplicate work, since an out of range

278 * Rune wouldn't have fit in one or two bytes.

279 */

280 if (c > Runemax)

281 c = Runeerror;

282

283 /*

284 * three character sequence

285 * 0800-FFFF => T3 Tx Tx

286 */

287 if (c <= Rune3) {

288 str[0] = T3 \| (c >> 2*Bitx);

289 str[1] = Tx \| ((c >> 1*Bitx) & Maskx);

290 str[2] = Tx \| (c & Maskx);

291 return 3;

292 }

293

294 /*

295 * four character sequence (21-bit value)

296 * 10000-1FFFFF => T4 Tx Tx Tx

297 */

298 str[0] = T4 \| (c >> 3*Bitx);

299 str[1] = Tx \| ((c >> 2*Bitx) & Maskx);

300 str[2] = Tx \| ((c >> 1*Bitx) & Maskx);

301 str[3] = Tx \| (c & Maskx);

302 return 4;

303 }

304

305 int

306 runelen(Rune rune)

307 {

308 char str[10];

309

310 return runetochar(str, &rune);

311 }

312

313 int

314 runenlen(const Rune *r, int nrune)

315 {

316 int nb, c;

317

318 nb = 0;

319 while(nrune--) {

320 c = *r++;

321 if (c <= Rune1)

322 nb++;

323 else if (c <= Rune2)

324 nb += 2;

325 else if (c <= Rune3)

326 nb += 3;

327 else /* assert(c <= Rune4) */

328 nb += 4;

329 }

330 return nb;

331 }

332

333 int

334 fullrune(const char *str, int n)

335 {

336 if (n > 0) {

337 int c = (uchar)str;

338 if (c < Tx)

339 return 1;

340 if (n > 1) {

341 if (c < T3)

342 return 1;

343 if (n > 2) {

344 if (c < T4 \|\| n > 3)

345 return 1;

346 }

347 }

348 }

349 return 0;

350 }

OLD	NEW

« no previous file with comments | « third_party/libphonenumber/cpp/src/utf/README ('k') | third_party/libphonenumber/cpp/src/utf/stringpiece.h » ('j') | no next file with comments »