OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 1999-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: utf16.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 1999sep09 |
| 14 * created by: Markus W. Scherer |
| 15 */ |
| 16 |
| 17 /** |
| 18 * \file |
| 19 * \brief C API: 16-bit Unicode handling macros |
| 20 * |
| 21 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and
strings. |
| 22 * utf16.h is included by utf.h after unicode/umachine.h |
| 23 * and some common definitions. |
| 24 * |
| 25 * For more information see utf.h and the ICU User Guide Strings chapter |
| 26 * (http://icu-project.org/userguide/strings.html). |
| 27 * |
| 28 * <em>Usage:</em> |
| 29 * ICU coding guidelines for if() statements should be followed when using these
macros. |
| 30 * Compound statements (curly braces {}) must be used for if-else-while... |
| 31 * bodies and all macro statements should be terminated with semicolon. |
| 32 */ |
| 33 |
| 34 #ifndef __UTF16_H__ |
| 35 #define __UTF16_H__ |
| 36 |
| 37 /* utf.h must be included first. */ |
| 38 #ifndef __UTF_H__ |
| 39 # include "unicode/utf.h" |
| 40 #endif |
| 41 |
| 42 /* single-code point definitions -------------------------------------------- */ |
| 43 |
| 44 /** |
| 45 * Does this code unit alone encode a code point (BMP, not a surrogate)? |
| 46 * @param c 16-bit code unit |
| 47 * @return TRUE or FALSE |
| 48 * @stable ICU 2.4 |
| 49 */ |
| 50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) |
| 51 |
| 52 /** |
| 53 * Is this code unit a lead surrogate (U+d800..U+dbff)? |
| 54 * @param c 16-bit code unit |
| 55 * @return TRUE or FALSE |
| 56 * @stable ICU 2.4 |
| 57 */ |
| 58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) |
| 59 |
| 60 /** |
| 61 * Is this code unit a trail surrogate (U+dc00..U+dfff)? |
| 62 * @param c 16-bit code unit |
| 63 * @return TRUE or FALSE |
| 64 * @stable ICU 2.4 |
| 65 */ |
| 66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) |
| 67 |
| 68 /** |
| 69 * Is this code unit a surrogate (U+d800..U+dfff)? |
| 70 * @param c 16-bit code unit |
| 71 * @return TRUE or FALSE |
| 72 * @stable ICU 2.4 |
| 73 */ |
| 74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) |
| 75 |
| 76 /** |
| 77 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), |
| 78 * is it a lead surrogate? |
| 79 * @param c 16-bit code unit |
| 80 * @return TRUE or FALSE |
| 81 * @stable ICU 2.4 |
| 82 */ |
| 83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) |
| 84 |
| 85 /** |
| 86 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), |
| 87 * is it a trail surrogate? |
| 88 * @param c 16-bit code unit |
| 89 * @return TRUE or FALSE |
| 90 * @stable ICU 4.2 |
| 91 */ |
| 92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) |
| 93 |
| 94 /** |
| 95 * Helper constant for U16_GET_SUPPLEMENTARY. |
| 96 * @internal |
| 97 */ |
| 98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) |
| 99 |
| 100 /** |
| 101 * Get a supplementary code point value (U+10000..U+10ffff) |
| 102 * from its lead and trail surrogates. |
| 103 * The result is undefined if the input values are not |
| 104 * lead and trail surrogates. |
| 105 * |
| 106 * @param lead lead surrogate (U+d800..U+dbff) |
| 107 * @param trail trail surrogate (U+dc00..U+dfff) |
| 108 * @return supplementary code point (U+10000..U+10ffff) |
| 109 * @stable ICU 2.4 |
| 110 */ |
| 111 #define U16_GET_SUPPLEMENTARY(lead, trail) \ |
| 112 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) |
| 113 |
| 114 |
| 115 /** |
| 116 * Get the lead surrogate (0xd800..0xdbff) for a |
| 117 * supplementary code point (0x10000..0x10ffff). |
| 118 * @param supplementary 32-bit code point (U+10000..U+10ffff) |
| 119 * @return lead surrogate (U+d800..U+dbff) for supplementary |
| 120 * @stable ICU 2.4 |
| 121 */ |
| 122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) |
| 123 |
| 124 /** |
| 125 * Get the trail surrogate (0xdc00..0xdfff) for a |
| 126 * supplementary code point (0x10000..0x10ffff). |
| 127 * @param supplementary 32-bit code point (U+10000..U+10ffff) |
| 128 * @return trail surrogate (U+dc00..U+dfff) for supplementary |
| 129 * @stable ICU 2.4 |
| 130 */ |
| 131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) |
| 132 |
| 133 /** |
| 134 * How many 16-bit code units are used to encode this Unicode code point? (1 or
2) |
| 135 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff)
. |
| 136 * @param c 32-bit code point |
| 137 * @return 1 or 2 |
| 138 * @stable ICU 2.4 |
| 139 */ |
| 140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) |
| 141 |
| 142 /** |
| 143 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10f
fff). |
| 144 * @return 2 |
| 145 * @stable ICU 2.4 |
| 146 */ |
| 147 #define U16_MAX_LENGTH 2 |
| 148 |
| 149 /** |
| 150 * Get a code point from a string at a random-access offset, |
| 151 * without changing the offset. |
| 152 * "Unsafe" macro, assumes well-formed UTF-16. |
| 153 * |
| 154 * The offset may point to either the lead or trail surrogate unit |
| 155 * for a supplementary code point, in which case the macro will read |
| 156 * the adjacent matching surrogate as well. |
| 157 * The result is undefined if the offset points to a single, unpaired surrogate. |
| 158 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT
. |
| 159 * |
| 160 * @param s const UChar * string |
| 161 * @param i string offset |
| 162 * @param c output UChar32 variable |
| 163 * @see U16_GET |
| 164 * @stable ICU 2.4 |
| 165 */ |
| 166 #define U16_GET_UNSAFE(s, i, c) { \ |
| 167 (c)=(s)[i]; \ |
| 168 if(U16_IS_SURROGATE(c)) { \ |
| 169 if(U16_IS_SURROGATE_LEAD(c)) { \ |
| 170 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ |
| 171 } else { \ |
| 172 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ |
| 173 } \ |
| 174 } \ |
| 175 } |
| 176 |
| 177 /** |
| 178 * Get a code point from a string at a random-access offset, |
| 179 * without changing the offset. |
| 180 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 181 * |
| 182 * The offset may point to either the lead or trail surrogate unit |
| 183 * for a supplementary code point, in which case the macro will read |
| 184 * the adjacent matching surrogate as well. |
| 185 * If the offset points to a single, unpaired surrogate, then that itself |
| 186 * will be returned as the code point. |
| 187 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT
. |
| 188 * |
| 189 * @param s const UChar * string |
| 190 * @param start starting string offset (usually 0) |
| 191 * @param i string offset, must be start<=i<length |
| 192 * @param length string length |
| 193 * @param c output UChar32 variable |
| 194 * @see U16_GET_UNSAFE |
| 195 * @stable ICU 2.4 |
| 196 */ |
| 197 #define U16_GET(s, start, i, length, c) { \ |
| 198 (c)=(s)[i]; \ |
| 199 if(U16_IS_SURROGATE(c)) { \ |
| 200 uint16_t __c2; \ |
| 201 if(U16_IS_SURROGATE_LEAD(c)) { \ |
| 202 if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ |
| 203 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ |
| 204 } \ |
| 205 } else { \ |
| 206 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ |
| 207 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ |
| 208 } \ |
| 209 } \ |
| 210 } \ |
| 211 } |
| 212 |
| 213 /* definitions with forward iteration --------------------------------------- */ |
| 214 |
| 215 /** |
| 216 * Get a code point from a string at a code point boundary offset, |
| 217 * and advance the offset to the next code point boundary. |
| 218 * (Post-incrementing forward iteration.) |
| 219 * "Unsafe" macro, assumes well-formed UTF-16. |
| 220 * |
| 221 * The offset may point to the lead surrogate unit |
| 222 * for a supplementary code point, in which case the macro will read |
| 223 * the following trail surrogate as well. |
| 224 * If the offset points to a trail surrogate, then that itself |
| 225 * will be returned as the code point. |
| 226 * The result is undefined if the offset points to a single, unpaired lead surro
gate. |
| 227 * |
| 228 * @param s const UChar * string |
| 229 * @param i string offset |
| 230 * @param c output UChar32 variable |
| 231 * @see U16_NEXT |
| 232 * @stable ICU 2.4 |
| 233 */ |
| 234 #define U16_NEXT_UNSAFE(s, i, c) { \ |
| 235 (c)=(s)[(i)++]; \ |
| 236 if(U16_IS_LEAD(c)) { \ |
| 237 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ |
| 238 } \ |
| 239 } |
| 240 |
| 241 /** |
| 242 * Get a code point from a string at a code point boundary offset, |
| 243 * and advance the offset to the next code point boundary. |
| 244 * (Post-incrementing forward iteration.) |
| 245 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 246 * |
| 247 * The offset may point to the lead surrogate unit |
| 248 * for a supplementary code point, in which case the macro will read |
| 249 * the following trail surrogate as well. |
| 250 * If the offset points to a trail surrogate or |
| 251 * to a single, unpaired lead surrogate, then that itself |
| 252 * will be returned as the code point. |
| 253 * |
| 254 * @param s const UChar * string |
| 255 * @param i string offset, must be i<length |
| 256 * @param length string length |
| 257 * @param c output UChar32 variable |
| 258 * @see U16_NEXT_UNSAFE |
| 259 * @stable ICU 2.4 |
| 260 */ |
| 261 #define U16_NEXT(s, i, length, c) { \ |
| 262 (c)=(s)[(i)++]; \ |
| 263 if(U16_IS_LEAD(c)) { \ |
| 264 uint16_t __c2; \ |
| 265 if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ |
| 266 ++(i); \ |
| 267 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ |
| 268 } \ |
| 269 } \ |
| 270 } |
| 271 |
| 272 /** |
| 273 * Append a code point to a string, overwriting 1 or 2 code units. |
| 274 * The offset points to the current end of the string contents |
| 275 * and is advanced (post-increment). |
| 276 * "Unsafe" macro, assumes a valid code point and sufficient space in the string
. |
| 277 * Otherwise, the result is undefined. |
| 278 * |
| 279 * @param s const UChar * string buffer |
| 280 * @param i string offset |
| 281 * @param c code point to append |
| 282 * @see U16_APPEND |
| 283 * @stable ICU 2.4 |
| 284 */ |
| 285 #define U16_APPEND_UNSAFE(s, i, c) { \ |
| 286 if((uint32_t)(c)<=0xffff) { \ |
| 287 (s)[(i)++]=(uint16_t)(c); \ |
| 288 } else { \ |
| 289 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ |
| 290 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ |
| 291 } \ |
| 292 } |
| 293 |
| 294 /** |
| 295 * Append a code point to a string, overwriting 1 or 2 code units. |
| 296 * The offset points to the current end of the string contents |
| 297 * and is advanced (post-increment). |
| 298 * "Safe" macro, checks for a valid code point. |
| 299 * If a surrogate pair is written, checks for sufficient space in the string. |
| 300 * If the code point is not valid or a trail surrogate does not fit, |
| 301 * then isError is set to TRUE. |
| 302 * |
| 303 * @param s const UChar * string buffer |
| 304 * @param i string offset, must be i<capacity |
| 305 * @param capacity size of the string buffer |
| 306 * @param c code point to append |
| 307 * @param isError output UBool set to TRUE if an error occurs, otherwise not mod
ified |
| 308 * @see U16_APPEND_UNSAFE |
| 309 * @stable ICU 2.4 |
| 310 */ |
| 311 #define U16_APPEND(s, i, capacity, c, isError) { \ |
| 312 if((uint32_t)(c)<=0xffff) { \ |
| 313 (s)[(i)++]=(uint16_t)(c); \ |
| 314 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ |
| 315 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ |
| 316 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ |
| 317 } else /* c>0x10ffff or not enough space */ { \ |
| 318 (isError)=TRUE; \ |
| 319 } \ |
| 320 } |
| 321 |
| 322 /** |
| 323 * Advance the string offset from one code point boundary to the next. |
| 324 * (Post-incrementing iteration.) |
| 325 * "Unsafe" macro, assumes well-formed UTF-16. |
| 326 * |
| 327 * @param s const UChar * string |
| 328 * @param i string offset |
| 329 * @see U16_FWD_1 |
| 330 * @stable ICU 2.4 |
| 331 */ |
| 332 #define U16_FWD_1_UNSAFE(s, i) { \ |
| 333 if(U16_IS_LEAD((s)[(i)++])) { \ |
| 334 ++(i); \ |
| 335 } \ |
| 336 } |
| 337 |
| 338 /** |
| 339 * Advance the string offset from one code point boundary to the next. |
| 340 * (Post-incrementing iteration.) |
| 341 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 342 * |
| 343 * @param s const UChar * string |
| 344 * @param i string offset, must be i<length |
| 345 * @param length string length |
| 346 * @see U16_FWD_1_UNSAFE |
| 347 * @stable ICU 2.4 |
| 348 */ |
| 349 #define U16_FWD_1(s, i, length) { \ |
| 350 if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \ |
| 351 ++(i); \ |
| 352 } \ |
| 353 } |
| 354 |
| 355 /** |
| 356 * Advance the string offset from one code point boundary to the n-th next one, |
| 357 * i.e., move forward by n code points. |
| 358 * (Post-incrementing iteration.) |
| 359 * "Unsafe" macro, assumes well-formed UTF-16. |
| 360 * |
| 361 * @param s const UChar * string |
| 362 * @param i string offset |
| 363 * @param n number of code points to skip |
| 364 * @see U16_FWD_N |
| 365 * @stable ICU 2.4 |
| 366 */ |
| 367 #define U16_FWD_N_UNSAFE(s, i, n) { \ |
| 368 int32_t __N=(n); \ |
| 369 while(__N>0) { \ |
| 370 U16_FWD_1_UNSAFE(s, i); \ |
| 371 --__N; \ |
| 372 } \ |
| 373 } |
| 374 |
| 375 /** |
| 376 * Advance the string offset from one code point boundary to the n-th next one, |
| 377 * i.e., move forward by n code points. |
| 378 * (Post-incrementing iteration.) |
| 379 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 380 * |
| 381 * @param s const UChar * string |
| 382 * @param i string offset, must be i<length |
| 383 * @param length string length |
| 384 * @param n number of code points to skip |
| 385 * @see U16_FWD_N_UNSAFE |
| 386 * @stable ICU 2.4 |
| 387 */ |
| 388 #define U16_FWD_N(s, i, length, n) { \ |
| 389 int32_t __N=(n); \ |
| 390 while(__N>0 && (i)<(length)) { \ |
| 391 U16_FWD_1(s, i, length); \ |
| 392 --__N; \ |
| 393 } \ |
| 394 } |
| 395 |
| 396 /** |
| 397 * Adjust a random-access offset to a code point boundary |
| 398 * at the start of a code point. |
| 399 * If the offset points to the trail surrogate of a surrogate pair, |
| 400 * then the offset is decremented. |
| 401 * Otherwise, it is not modified. |
| 402 * "Unsafe" macro, assumes well-formed UTF-16. |
| 403 * |
| 404 * @param s const UChar * string |
| 405 * @param i string offset |
| 406 * @see U16_SET_CP_START |
| 407 * @stable ICU 2.4 |
| 408 */ |
| 409 #define U16_SET_CP_START_UNSAFE(s, i) { \ |
| 410 if(U16_IS_TRAIL((s)[i])) { \ |
| 411 --(i); \ |
| 412 } \ |
| 413 } |
| 414 |
| 415 /** |
| 416 * Adjust a random-access offset to a code point boundary |
| 417 * at the start of a code point. |
| 418 * If the offset points to the trail surrogate of a surrogate pair, |
| 419 * then the offset is decremented. |
| 420 * Otherwise, it is not modified. |
| 421 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 422 * |
| 423 * @param s const UChar * string |
| 424 * @param start starting string offset (usually 0) |
| 425 * @param i string offset, must be start<=i |
| 426 * @see U16_SET_CP_START_UNSAFE |
| 427 * @stable ICU 2.4 |
| 428 */ |
| 429 #define U16_SET_CP_START(s, start, i) { \ |
| 430 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ |
| 431 --(i); \ |
| 432 } \ |
| 433 } |
| 434 |
| 435 /* definitions with backward iteration -------------------------------------- */ |
| 436 |
| 437 /** |
| 438 * Move the string offset from one code point boundary to the previous one |
| 439 * and get the code point between them. |
| 440 * (Pre-decrementing backward iteration.) |
| 441 * "Unsafe" macro, assumes well-formed UTF-16. |
| 442 * |
| 443 * The input offset may be the same as the string length. |
| 444 * If the offset is behind a trail surrogate unit |
| 445 * for a supplementary code point, then the macro will read |
| 446 * the preceding lead surrogate as well. |
| 447 * If the offset is behind a lead surrogate, then that itself |
| 448 * will be returned as the code point. |
| 449 * The result is undefined if the offset is behind a single, unpaired trail surr
ogate. |
| 450 * |
| 451 * @param s const UChar * string |
| 452 * @param i string offset |
| 453 * @param c output UChar32 variable |
| 454 * @see U16_PREV |
| 455 * @stable ICU 2.4 |
| 456 */ |
| 457 #define U16_PREV_UNSAFE(s, i, c) { \ |
| 458 (c)=(s)[--(i)]; \ |
| 459 if(U16_IS_TRAIL(c)) { \ |
| 460 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ |
| 461 } \ |
| 462 } |
| 463 |
| 464 /** |
| 465 * Move the string offset from one code point boundary to the previous one |
| 466 * and get the code point between them. |
| 467 * (Pre-decrementing backward iteration.) |
| 468 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 469 * |
| 470 * The input offset may be the same as the string length. |
| 471 * If the offset is behind a trail surrogate unit |
| 472 * for a supplementary code point, then the macro will read |
| 473 * the preceding lead surrogate as well. |
| 474 * If the offset is behind a lead surrogate or behind a single, unpaired |
| 475 * trail surrogate, then that itself |
| 476 * will be returned as the code point. |
| 477 * |
| 478 * @param s const UChar * string |
| 479 * @param start starting string offset (usually 0) |
| 480 * @param i string offset, must be start<i |
| 481 * @param c output UChar32 variable |
| 482 * @see U16_PREV_UNSAFE |
| 483 * @stable ICU 2.4 |
| 484 */ |
| 485 #define U16_PREV(s, start, i, c) { \ |
| 486 (c)=(s)[--(i)]; \ |
| 487 if(U16_IS_TRAIL(c)) { \ |
| 488 uint16_t __c2; \ |
| 489 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ |
| 490 --(i); \ |
| 491 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ |
| 492 } \ |
| 493 } \ |
| 494 } |
| 495 |
| 496 /** |
| 497 * Move the string offset from one code point boundary to the previous one. |
| 498 * (Pre-decrementing backward iteration.) |
| 499 * The input offset may be the same as the string length. |
| 500 * "Unsafe" macro, assumes well-formed UTF-16. |
| 501 * |
| 502 * @param s const UChar * string |
| 503 * @param i string offset |
| 504 * @see U16_BACK_1 |
| 505 * @stable ICU 2.4 |
| 506 */ |
| 507 #define U16_BACK_1_UNSAFE(s, i) { \ |
| 508 if(U16_IS_TRAIL((s)[--(i)])) { \ |
| 509 --(i); \ |
| 510 } \ |
| 511 } |
| 512 |
| 513 /** |
| 514 * Move the string offset from one code point boundary to the previous one. |
| 515 * (Pre-decrementing backward iteration.) |
| 516 * The input offset may be the same as the string length. |
| 517 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 518 * |
| 519 * @param s const UChar * string |
| 520 * @param start starting string offset (usually 0) |
| 521 * @param i string offset, must be start<i |
| 522 * @see U16_BACK_1_UNSAFE |
| 523 * @stable ICU 2.4 |
| 524 */ |
| 525 #define U16_BACK_1(s, start, i) { \ |
| 526 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ |
| 527 --(i); \ |
| 528 } \ |
| 529 } |
| 530 |
| 531 /** |
| 532 * Move the string offset from one code point boundary to the n-th one before it
, |
| 533 * i.e., move backward by n code points. |
| 534 * (Pre-decrementing backward iteration.) |
| 535 * The input offset may be the same as the string length. |
| 536 * "Unsafe" macro, assumes well-formed UTF-16. |
| 537 * |
| 538 * @param s const UChar * string |
| 539 * @param i string offset |
| 540 * @param n number of code points to skip |
| 541 * @see U16_BACK_N |
| 542 * @stable ICU 2.4 |
| 543 */ |
| 544 #define U16_BACK_N_UNSAFE(s, i, n) { \ |
| 545 int32_t __N=(n); \ |
| 546 while(__N>0) { \ |
| 547 U16_BACK_1_UNSAFE(s, i); \ |
| 548 --__N; \ |
| 549 } \ |
| 550 } |
| 551 |
| 552 /** |
| 553 * Move the string offset from one code point boundary to the n-th one before it
, |
| 554 * i.e., move backward by n code points. |
| 555 * (Pre-decrementing backward iteration.) |
| 556 * The input offset may be the same as the string length. |
| 557 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 558 * |
| 559 * @param s const UChar * string |
| 560 * @param start start of string |
| 561 * @param i string offset, must be start<i |
| 562 * @param n number of code points to skip |
| 563 * @see U16_BACK_N_UNSAFE |
| 564 * @stable ICU 2.4 |
| 565 */ |
| 566 #define U16_BACK_N(s, start, i, n) { \ |
| 567 int32_t __N=(n); \ |
| 568 while(__N>0 && (i)>(start)) { \ |
| 569 U16_BACK_1(s, start, i); \ |
| 570 --__N; \ |
| 571 } \ |
| 572 } |
| 573 |
| 574 /** |
| 575 * Adjust a random-access offset to a code point boundary after a code point. |
| 576 * If the offset is behind the lead surrogate of a surrogate pair, |
| 577 * then the offset is incremented. |
| 578 * Otherwise, it is not modified. |
| 579 * The input offset may be the same as the string length. |
| 580 * "Unsafe" macro, assumes well-formed UTF-16. |
| 581 * |
| 582 * @param s const UChar * string |
| 583 * @param i string offset |
| 584 * @see U16_SET_CP_LIMIT |
| 585 * @stable ICU 2.4 |
| 586 */ |
| 587 #define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ |
| 588 if(U16_IS_LEAD((s)[(i)-1])) { \ |
| 589 ++(i); \ |
| 590 } \ |
| 591 } |
| 592 |
| 593 /** |
| 594 * Adjust a random-access offset to a code point boundary after a code point. |
| 595 * If the offset is behind the lead surrogate of a surrogate pair, |
| 596 * then the offset is incremented. |
| 597 * Otherwise, it is not modified. |
| 598 * The input offset may be the same as the string length. |
| 599 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. |
| 600 * |
| 601 * @param s const UChar * string |
| 602 * @param start starting string offset (usually 0) |
| 603 * @param i string offset, start<=i<=length |
| 604 * @param length string length |
| 605 * @see U16_SET_CP_LIMIT_UNSAFE |
| 606 * @stable ICU 2.4 |
| 607 */ |
| 608 #define U16_SET_CP_LIMIT(s, start, i, length) { \ |
| 609 if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s
)[i])) { \ |
| 610 ++(i); \ |
| 611 } \ |
| 612 } |
| 613 |
| 614 #endif |
OLD | NEW |