public/common/unicode/utf8.h - Issue 18836004: Move ICU headers from public/{common,i18n} to source/{common,i18n}

Side by Side Diff: public/common/unicode/utf8.h

Issue 18836004: Move ICU headers from public/{common,i18n} to source/{common,i18n} (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu46.git@master

Patch Set: same as ps #3. retry uploading Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 *******************************************************************************

3 *

4 * Copyright (C) 1999-2009, International Business Machines

5 * Corporation and others. All Rights Reserved.

6 *

7 *******************************************************************************

8 * file name: utf8.h

9 * encoding: US-ASCII

10 * tab size: 8 (not used)

11 * indentation:4

12 *

13 * created on: 1999sep13

14 * created by: Markus W. Scherer

15 */

16

17 /**

18 * \file

19 * \brief C API: 8-bit Unicode handling macros

20 *

21 * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes ) and strings.

22 * utf8.h is included by utf.h after unicode/umachine.h

23 * and some common definitions.

24 *

25 * For more information see utf.h and the ICU User Guide Strings chapter

26 * (http://icu-project.org/userguide/strings.html).

27 *

28 * <em>Usage:</em>

29 * ICU coding guidelines for if() statements should be followed when using these macros.

30 * Compound statements (curly braces {}) must be used for if-else-while...

31 * bodies and all macro statements should be terminated with semicolon.

32 */

33

34 #ifndef __UTF8_H__

35 #define __UTF8_H__

36

37 /* utf.h must be included first. */

38 #ifndef __UTF_H__

39 # include "unicode/utf.h"

40 #endif

41

42 /* internal definitions ----------------------------------------------------- */

43

44 /**

45 * \var utf8_countTrailBytes

46 * Internal array with numbers of trail bytes for any given byte used in

47 * lead byte position.

48 *

49 * This is internal since it is not meant to be called directly by external clie nts;

50 * however it is called by public macros in this file and thus must remain stabl e,

51 * and should not be hidden when other internal functions are hidden (otherwise

52 * public macros would fail to compile).

53 * @internal

54 */

55 #ifdef U_UTF8_IMPL

56 U_EXPORT const uint8_t

57 #elif defined(U_STATIC_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION)

58 U_CFUNC const uint8_t

59 #else

60 U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? / /U_IMPORT*/

61 #endif

62 utf8_countTrailBytes[256];

63

64 /**

65 * Count the trail bytes for a UTF-8 lead byte.

66 *

67 * This is internal since it is not meant to be called directly by external clie nts;

68 * however it is called by public macros in this file and thus must remain stabl e.

69 * @internal

70 */

71 #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])

72

73 /**

74 * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.

75 *

76 * This is internal since it is not meant to be called directly by external clie nts;

77 * however it is called by public macros in this file and thus must remain stabl e.

78 * @internal

79 */

80 #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countT railBytes)))-1)

81

82 /**

83 * Function for handling "next code point" with error-checking.

84 *

85 * This is internal since it is not meant to be called directly by external clie nts;

86 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros i n this

87 * file and thus must remain stable, and should not be hidden when other interna l

88 * functions are hidden (otherwise public macros would fail to compile).

89 * @internal

90 */

91 U_STABLE UChar32 U_EXPORT2

92 utf8_nextCharSafeBody(const uint8_t s, int32_t pi, int32_t length, UChar32 c, UBool strict);

93

94 /**

95 * Function for handling "append code point" with error-checking.

96 *

97 * This is internal since it is not meant to be called directly by external clie nts;

98 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros i n this

99 * file and thus must remain stable, and should not be hidden when other interna l

100 * functions are hidden (otherwise public macros would fail to compile).

101 * @internal

102 */

103 U_STABLE int32_t U_EXPORT2

104 utf8_appendCharSafeBody(uint8_t s, int32_t i, int32_t length, UChar32 c, UBool pIsError);

105

106 /**

107 * Function for handling "previous code point" with error-checking.

108 *

109 * This is internal since it is not meant to be called directly by external clie nts;

110 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros i n this

111 * file and thus must remain stable, and should not be hidden when other interna l

112 * functions are hidden (otherwise public macros would fail to compile).

113 * @internal

114 */

115 U_STABLE UChar32 U_EXPORT2

116 utf8_prevCharSafeBody(const uint8_t s, int32_t start, int32_t pi, UChar32 c, U Bool strict);

117

118 /**

119 * Function for handling "skip backward one code point" with error-checking.

120 *

121 * This is internal since it is not meant to be called directly by external clie nts;

122 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros i n this

123 * file and thus must remain stable, and should not be hidden when other interna l

124 * functions are hidden (otherwise public macros would fail to compile).

125 * @internal

126 */

127 U_STABLE int32_t U_EXPORT2

128 utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);

129

130 /* single-code point definitions -------------------------------------------- */

131

132 /**

133 * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?

134 * @param c 8-bit code unit (byte)

135 * @return TRUE or FALSE

136 * @stable ICU 2.4

137 */

138 #define U8_IS_SINGLE(c) (((c)&0x80)==0)

139

140 /**

141 * Is this code unit (byte) a UTF-8 lead byte?

142 * @param c 8-bit code unit (byte)

143 * @return TRUE or FALSE

144 * @stable ICU 2.4

145 */

146 #define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)

147

148 /**

149 * Is this code unit (byte) a UTF-8 trail byte?

150 * @param c 8-bit code unit (byte)

151 * @return TRUE or FALSE

152 * @stable ICU 2.4

153 */

154 #define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)

155

156 /**

157 * How many code units (bytes) are used for the UTF-8 encoding

158 * of this Unicode code point?

159 * @param c 32-bit code point

160 * @return 1..4, or 0 if c is a surrogate or not a Unicode code point

161 * @stable ICU 2.4

162 */

163 #define U8_LENGTH(c) \

164 ((uint32_t)(c)<=0x7f ? 1 : \

165 ((uint32_t)(c)<=0x7ff ? 2 : \

166 ((uint32_t)(c)<=0xd7ff ? 3 : \

167 ((uint32_t)(c)<=0xdfff \|\| (uint32_t)(c)>0x10ffff ? 0 : \

168 ((uint32_t)(c)<=0xffff ? 3 : 4)\

169 ) \

170 ) \

171 ) \

172 )

173

174 /**

175 * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000 ..U+10ffff).

176 * @return 4

177 * @stable ICU 2.4

178 */

179 #define U8_MAX_LENGTH 4

180

181 /**

182 * Get a code point from a string at a random-access offset,

183 * without changing the offset.

184 * The offset may point to either the lead byte or one of the trail bytes

185 * for a code point, in which case the macro will read all of the bytes

186 * for the code point.

187 * The result is undefined if the offset points to an illegal UTF-8

188 * byte sequence.

189 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.

190 *

191 * @param s const uint8_t * string

192 * @param i string offset

193 * @param c output UChar32 variable

194 * @see U8_GET

195 * @stable ICU 2.4

196 */

197 #define U8_GET_UNSAFE(s, i, c) { \

198 int32_t _u8_get_unsafe_index=(int32_t)(i); \

199 U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \

200 U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \

201 }

202

203 /**

204 * Get a code point from a string at a random-access offset,

205 * without changing the offset.

206 * The offset may point to either the lead byte or one of the trail bytes

207 * for a code point, in which case the macro will read all of the bytes

208 * for the code point.

209 * If the offset points to an illegal UTF-8 byte sequence, then

210 * c is set to a negative value.

211 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.

212 *

213 * @param s const uint8_t * string

214 * @param start starting string offset

215 * @param i string offset, must be start<=i<length

216 * @param length string length

217 * @param c output UChar32 variable, set to <0 in case of an error

218 * @see U8_GET_UNSAFE

219 * @stable ICU 2.4

220 */

221 #define U8_GET(s, start, i, length, c) { \

222 int32_t _u8_get_index=(int32_t)(i); \

223 U8_SET_CP_START(s, start, _u8_get_index); \

224 U8_NEXT(s, _u8_get_index, length, c); \

225 }

226

227 /* definitions with forward iteration --------------------------------------- */

228

229 /**

230 * Get a code point from a string at a code point boundary offset,

231 * and advance the offset to the next code point boundary.

232 * (Post-incrementing forward iteration.)

233 * "Unsafe" macro, assumes well-formed UTF-8.

234 *

235 * The offset may point to the lead byte of a multi-byte sequence,

236 * in which case the macro will read the whole sequence.

237 * The result is undefined if the offset points to a trail byte

238 * or an illegal UTF-8 sequence.

239 *

240 * @param s const uint8_t * string

241 * @param i string offset

242 * @param c output UChar32 variable

243 * @see U8_NEXT

244 * @stable ICU 2.4

245 */

246 #define U8_NEXT_UNSAFE(s, i, c) { \

247 (c)=(uint8_t)(s)[(i)++]; \

248 if((uint8_t)((c)-0xc0)<0x35) { \

249 uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \

250 U8_MASK_LEAD_BYTE(c, __count); \

251 switch(__count) { \

252 /* each following branch falls through to the next one */ \

253 case 3: \

254 (c)=((c)<<6)\|((s)[(i)++]&0x3f); \

255 case 2: \

256 (c)=((c)<<6)\|((s)[(i)++]&0x3f); \

257 case 1: \

258 (c)=((c)<<6)\|((s)[(i)++]&0x3f); \

259 /* no other branches to optimize switch() */ \

260 break; \

261 } \

262 } \

263 }

264

265 /**

266 * Get a code point from a string at a code point boundary offset,

267 * and advance the offset to the next code point boundary.

268 * (Post-incrementing forward iteration.)

269 * "Safe" macro, checks for illegal sequences and for string boundaries.

270 *

271 * The offset may point to the lead byte of a multi-byte sequence,

272 * in which case the macro will read the whole sequence.

273 * If the offset points to a trail byte or an illegal UTF-8 sequence, then

274 * c is set to a negative value.

275 *

276 * @param s const uint8_t * string

277 * @param i string offset, must be i<length

278 * @param length string length

279 * @param c output UChar32 variable, set to <0 in case of an error

280 * @see U8_NEXT_UNSAFE

281 * @stable ICU 2.4

282 */

283 #define U8_NEXT(s, i, length, c) { \

284 (c)=(uint8_t)(s)[(i)++]; \

285 if((c)>=0x80) { \

286 uint8_t __t1, __t2; \

287 if( /* handle U+1000..U+CFFF inline */ \

288 (0xe0<(c) && (c)<=0xec) && \

289 (((i)+1)<(length)) && \

290 (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \

291 (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \

292 ) { \

293 /* no need for (c&0xf) because the upper bits are truncated after << 12 in the cast to (UChar) */ \

294 (c)=(UChar)(((c)<<12)\|(__t1<<6)\|__t2); \

295 (i)+=2; \

296 } else if( /* handle U+0080..U+07FF inline */ \

297 ((c)<0xe0 && (c)>=0xc2) && \

298 ((i)<(length)) && \

299 (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \

300 ) { \

301 (c)=(UChar)((((c)&0x1f)<<6)\|__t1); \

302 ++(i); \

303 } else if(U8_IS_LEAD(c)) { \

304 /* function call for "complicated" and error cases */ \

305 (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length ), c, -1); \

306 } else { \

307 (c)=U_SENTINEL; \

308 } \

309 } \

310 }

311

312 /**

313 * Append a code point to a string, overwriting 1 to 4 bytes.

314 * The offset points to the current end of the string contents

315 * and is advanced (post-increment).

316 * "Unsafe" macro, assumes a valid code point and sufficient space in the string .

317 * Otherwise, the result is undefined.

318 *

319 * @param s const uint8_t * string buffer

320 * @param i string offset

321 * @param c code point to append

322 * @see U8_APPEND

323 * @stable ICU 2.4

324 */

325 #define U8_APPEND_UNSAFE(s, i, c) { \

326 if((uint32_t)(c)<=0x7f) { \

327 (s)[(i)++]=(uint8_t)(c); \

328 } else { \

329 if((uint32_t)(c)<=0x7ff) { \

330 (s)[(i)++]=(uint8_t)(((c)>>6)\|0xc0); \

331 } else { \

332 if((uint32_t)(c)<=0xffff) { \

333 (s)[(i)++]=(uint8_t)(((c)>>12)\|0xe0); \

334 } else { \

335 (s)[(i)++]=(uint8_t)(((c)>>18)\|0xf0); \

336 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)\|0x80); \

337 } \

338 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)\|0x80); \

339 } \

340 (s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \

341 } \

342 }

343

344 /**

345 * Append a code point to a string, overwriting 1 to 4 bytes.

346 * The offset points to the current end of the string contents

347 * and is advanced (post-increment).

348 * "Safe" macro, checks for a valid code point.

349 * If a non-ASCII code point is written, checks for sufficient space in the stri ng.

350 * If the code point is not valid or trail bytes do not fit,

351 * then isError is set to TRUE.

352 *

353 * @param s const uint8_t * string buffer

354 * @param i string offset, must be i<capacity

355 * @param capacity size of the string buffer

356 * @param c code point to append

357 * @param isError output UBool set to TRUE if an error occurs, otherwise not mod ified

358 * @see U8_APPEND_UNSAFE

359 * @stable ICU 2.4

360 */

361 #define U8_APPEND(s, i, capacity, c, isError) { \

362 if((uint32_t)(c)<=0x7f) { \

363 (s)[(i)++]=(uint8_t)(c); \

364 } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \

365 (s)[(i)++]=(uint8_t)(((c)>>6)\|0xc0); \

366 (s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \

367 } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \

368 (s)[(i)++]=(uint8_t)(((c)>>12)\|0xe0); \

369 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)\|0x80); \

370 (s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \

371 } else { \

372 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(i sError)); \

373 } \

374 }

375

376 /**

377 * Advance the string offset from one code point boundary to the next.

378 * (Post-incrementing iteration.)

379 * "Unsafe" macro, assumes well-formed UTF-8.

380 *

381 * @param s const uint8_t * string

382 * @param i string offset

383 * @see U8_FWD_1

384 * @stable ICU 2.4

385 */

386 #define U8_FWD_1_UNSAFE(s, i) { \

387 (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \

388 }

389

390 /**

391 * Advance the string offset from one code point boundary to the next.

392 * (Post-incrementing iteration.)

393 * "Safe" macro, checks for illegal sequences and for string boundaries.

394 *

395 * @param s const uint8_t * string

396 * @param i string offset, must be i<length

397 * @param length string length

398 * @see U8_FWD_1_UNSAFE

399 * @stable ICU 2.4

400 */

401 #define U8_FWD_1(s, i, length) { \

402 uint8_t __b=(uint8_t)(s)[(i)++]; \

403 if(U8_IS_LEAD(__b)) { \

404 uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \

405 if((i)+__count>(length)) { \

406 __count=(uint8_t)((length)-(i)); \

407 } \

408 while(__count>0 && U8_IS_TRAIL((s)[i])) { \

409 ++(i); \

410 --__count; \

411 } \

412 } \

413 }

414

415 /**

416 * Advance the string offset from one code point boundary to the n-th next one,

417 * i.e., move forward by n code points.

418 * (Post-incrementing iteration.)

419 * "Unsafe" macro, assumes well-formed UTF-8.

420 *

421 * @param s const uint8_t * string

422 * @param i string offset

423 * @param n number of code points to skip

424 * @see U8_FWD_N

425 * @stable ICU 2.4

426 */

427 #define U8_FWD_N_UNSAFE(s, i, n) { \

428 int32_t __N=(n); \

429 while(__N>0) { \

430 U8_FWD_1_UNSAFE(s, i); \

431 --__N; \

432 } \

433 }

434

435 /**

436 * Advance the string offset from one code point boundary to the n-th next one,

437 * i.e., move forward by n code points.

438 * (Post-incrementing iteration.)

439 * "Safe" macro, checks for illegal sequences and for string boundaries.

440 *

441 * @param s const uint8_t * string

442 * @param i string offset, must be i<length

443 * @param length string length

444 * @param n number of code points to skip

445 * @see U8_FWD_N_UNSAFE

446 * @stable ICU 2.4

447 */

448 #define U8_FWD_N(s, i, length, n) { \

449 int32_t __N=(n); \

450 while(__N>0 && (i)<(length)) { \

451 U8_FWD_1(s, i, length); \

452 --__N; \

453 } \

454 }

455

456 /**

457 * Adjust a random-access offset to a code point boundary

458 * at the start of a code point.

459 * If the offset points to a UTF-8 trail byte,

460 * then the offset is moved backward to the corresponding lead byte.

461 * Otherwise, it is not modified.

462 * "Unsafe" macro, assumes well-formed UTF-8.

463 *

464 * @param s const uint8_t * string

465 * @param i string offset

466 * @see U8_SET_CP_START

467 * @stable ICU 2.4

468 */

469 #define U8_SET_CP_START_UNSAFE(s, i) { \

470 while(U8_IS_TRAIL((s)[i])) { --(i); } \

471 }

472

473 /**

474 * Adjust a random-access offset to a code point boundary

475 * at the start of a code point.

476 * If the offset points to a UTF-8 trail byte,

477 * then the offset is moved backward to the corresponding lead byte.

478 * Otherwise, it is not modified.

479 * "Safe" macro, checks for illegal sequences and for string boundaries.

480 *

481 * @param s const uint8_t * string

482 * @param start starting string offset (usually 0)

483 * @param i string offset, must be start<=i

484 * @see U8_SET_CP_START_UNSAFE

485 * @stable ICU 2.4

486 */

487 #define U8_SET_CP_START(s, start, i) { \

488 if(U8_IS_TRAIL((s)[(i)])) { \

489 (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \

490 } \

491 }

492

493 /* definitions with backward iteration -------------------------------------- */

494

495 /**

496 * Move the string offset from one code point boundary to the previous one

497 * and get the code point between them.

498 * (Pre-decrementing backward iteration.)

499 * "Unsafe" macro, assumes well-formed UTF-8.

500 *

501 * The input offset may be the same as the string length.

502 * If the offset is behind a multi-byte sequence, then the macro will read

503 * the whole sequence.

504 * If the offset is behind a lead byte, then that itself

505 * will be returned as the code point.

506 * The result is undefined if the offset is behind an illegal UTF-8 sequence.

507 *

508 * @param s const uint8_t * string

509 * @param i string offset

510 * @param c output UChar32 variable

511 * @see U8_PREV

512 * @stable ICU 2.4

513 */

514 #define U8_PREV_UNSAFE(s, i, c) { \

515 (c)=(uint8_t)(s)[--(i)]; \

516 if(U8_IS_TRAIL(c)) { \

517 uint8_t __b, __count=1, __shift=6; \

518 \

519 /* c is a trail byte */ \

520 (c)&=0x3f; \

521 for(;;) { \

522 __b=(uint8_t)(s)[--(i)]; \

523 if(__b>=0xc0) { \

524 U8_MASK_LEAD_BYTE(__b, __count); \

525 (c)\|=(UChar32)__b<<__shift; \

526 break; \

527 } else { \

528 (c)\|=(UChar32)(__b&0x3f)<<__shift; \

529 ++__count; \

530 __shift+=6; \

531 } \

532 } \

533 } \

534 }

535

536 /**

537 * Move the string offset from one code point boundary to the previous one

538 * and get the code point between them.

539 * (Pre-decrementing backward iteration.)

540 * "Safe" macro, checks for illegal sequences and for string boundaries.

541 *

542 * The input offset may be the same as the string length.

543 * If the offset is behind a multi-byte sequence, then the macro will read

544 * the whole sequence.

545 * If the offset is behind a lead byte, then that itself

546 * will be returned as the code point.

547 * If the offset is behind an illegal UTF-8 sequence, then c is set to a negativ e value.

548 *

549 * @param s const uint8_t * string

550 * @param start starting string offset (usually 0)

551 * @param i string offset, must be start<i

552 * @param c output UChar32 variable, set to <0 in case of an error

553 * @see U8_PREV_UNSAFE

554 * @stable ICU 2.4

555 */

556 #define U8_PREV(s, start, i, c) { \

557 (c)=(uint8_t)(s)[--(i)]; \

558 if((c)>=0x80) { \

559 if((c)<=0xbf) { \

560 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \

561 } else { \

562 (c)=U_SENTINEL; \

563 } \

564 } \

565 }

566

567 /**

568 * Move the string offset from one code point boundary to the previous one.

569 * (Pre-decrementing backward iteration.)

570 * The input offset may be the same as the string length.

571 * "Unsafe" macro, assumes well-formed UTF-8.

572 *

573 * @param s const uint8_t * string

574 * @param i string offset

575 * @see U8_BACK_1

576 * @stable ICU 2.4

577 */

578 #define U8_BACK_1_UNSAFE(s, i) { \

579 while(U8_IS_TRAIL((s)[--(i)])) {} \

580 }

581

582 /**

583 * Move the string offset from one code point boundary to the previous one.

584 * (Pre-decrementing backward iteration.)

585 * The input offset may be the same as the string length.

586 * "Safe" macro, checks for illegal sequences and for string boundaries.

587 *

588 * @param s const uint8_t * string

589 * @param start starting string offset (usually 0)

590 * @param i string offset, must be start<i

591 * @see U8_BACK_1_UNSAFE

592 * @stable ICU 2.4

593 */

594 #define U8_BACK_1(s, start, i) { \

595 if(U8_IS_TRAIL((s)[--(i)])) { \

596 (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \

597 } \

598 }

599

600 /**

601 * Move the string offset from one code point boundary to the n-th one before it ,

602 * i.e., move backward by n code points.

603 * (Pre-decrementing backward iteration.)

604 * The input offset may be the same as the string length.

605 * "Unsafe" macro, assumes well-formed UTF-8.

606 *

607 * @param s const uint8_t * string

608 * @param i string offset

609 * @param n number of code points to skip

610 * @see U8_BACK_N

611 * @stable ICU 2.4

612 */

613 #define U8_BACK_N_UNSAFE(s, i, n) { \

614 int32_t __N=(n); \

615 while(__N>0) { \

616 U8_BACK_1_UNSAFE(s, i); \

617 --__N; \

618 } \

619 }

620

621 /**

622 * Move the string offset from one code point boundary to the n-th one before it ,

623 * i.e., move backward by n code points.

624 * (Pre-decrementing backward iteration.)

625 * The input offset may be the same as the string length.

626 * "Safe" macro, checks for illegal sequences and for string boundaries.

627 *

628 * @param s const uint8_t * string

629 * @param start index of the start of the string

630 * @param i string offset, must be start<i

631 * @param n number of code points to skip

632 * @see U8_BACK_N_UNSAFE

633 * @stable ICU 2.4

634 */

635 #define U8_BACK_N(s, start, i, n) { \

636 int32_t __N=(n); \

637 while(__N>0 && (i)>(start)) { \

638 U8_BACK_1(s, start, i); \

639 --__N; \

640 } \

641 }

642

643 /**

644 * Adjust a random-access offset to a code point boundary after a code point.

645 * If the offset is behind a partial multi-byte sequence,

646 * then the offset is incremented to behind the whole sequence.

647 * Otherwise, it is not modified.

648 * The input offset may be the same as the string length.

649 * "Unsafe" macro, assumes well-formed UTF-8.

650 *

651 * @param s const uint8_t * string

652 * @param i string offset

653 * @see U8_SET_CP_LIMIT

654 * @stable ICU 2.4

655 */

656 #define U8_SET_CP_LIMIT_UNSAFE(s, i) { \

657 U8_BACK_1_UNSAFE(s, i); \

658 U8_FWD_1_UNSAFE(s, i); \

659 }

660

661 /**

662 * Adjust a random-access offset to a code point boundary after a code point.

663 * If the offset is behind a partial multi-byte sequence,

664 * then the offset is incremented to behind the whole sequence.

665 * Otherwise, it is not modified.

666 * The input offset may be the same as the string length.

667 * "Safe" macro, checks for illegal sequences and for string boundaries.

668 *

669 * @param s const uint8_t * string

670 * @param start starting string offset (usually 0)

671 * @param i string offset, must be start<=i<=length

672 * @param length string length

673 * @see U8_SET_CP_LIMIT_UNSAFE

674 * @stable ICU 2.4

675 */

676 #define U8_SET_CP_LIMIT(s, start, i, length) { \

677 if((start)<(i) && (i)<(length)) { \

678 U8_BACK_1(s, start, i); \

679 U8_FWD_1(s, i, length); \

680 } \

681 }

682

683 #endif

OLD	NEW

« no previous file with comments | « public/common/unicode/utf32.h ('k') | public/common/unicode/utf_old.h » ('j') | no next file with comments »