OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 1999-2010, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: utf16.h | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 1999sep09 | |
14 * created by: Markus W. Scherer | |
15 */ | |
16 | |
17 /** | |
18 * \file | |
19 * \brief C API: 16-bit Unicode handling macros | |
20 * | |
21 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and
strings. | |
22 * utf16.h is included by utf.h after unicode/umachine.h | |
23 * and some common definitions. | |
24 * | |
25 * For more information see utf.h and the ICU User Guide Strings chapter | |
26 * (http://icu-project.org/userguide/strings.html). | |
27 * | |
28 * <em>Usage:</em> | |
29 * ICU coding guidelines for if() statements should be followed when using these
macros. | |
30 * Compound statements (curly braces {}) must be used for if-else-while... | |
31 * bodies and all macro statements should be terminated with semicolon. | |
32 */ | |
33 | |
34 #ifndef __UTF16_H__ | |
35 #define __UTF16_H__ | |
36 | |
37 /* utf.h must be included first. */ | |
38 #ifndef __UTF_H__ | |
39 # include "unicode/utf.h" | |
40 #endif | |
41 | |
42 /* single-code point definitions -------------------------------------------- */ | |
43 | |
44 /** | |
45 * Does this code unit alone encode a code point (BMP, not a surrogate)? | |
46 * @param c 16-bit code unit | |
47 * @return TRUE or FALSE | |
48 * @stable ICU 2.4 | |
49 */ | |
50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) | |
51 | |
52 /** | |
53 * Is this code unit a lead surrogate (U+d800..U+dbff)? | |
54 * @param c 16-bit code unit | |
55 * @return TRUE or FALSE | |
56 * @stable ICU 2.4 | |
57 */ | |
58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) | |
59 | |
60 /** | |
61 * Is this code unit a trail surrogate (U+dc00..U+dfff)? | |
62 * @param c 16-bit code unit | |
63 * @return TRUE or FALSE | |
64 * @stable ICU 2.4 | |
65 */ | |
66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) | |
67 | |
68 /** | |
69 * Is this code unit a surrogate (U+d800..U+dfff)? | |
70 * @param c 16-bit code unit | |
71 * @return TRUE or FALSE | |
72 * @stable ICU 2.4 | |
73 */ | |
74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) | |
75 | |
76 /** | |
77 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | |
78 * is it a lead surrogate? | |
79 * @param c 16-bit code unit | |
80 * @return TRUE or FALSE | |
81 * @stable ICU 2.4 | |
82 */ | |
83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) | |
84 | |
85 /** | |
86 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | |
87 * is it a trail surrogate? | |
88 * @param c 16-bit code unit | |
89 * @return TRUE or FALSE | |
90 * @stable ICU 4.2 | |
91 */ | |
92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) | |
93 | |
94 /** | |
95 * Helper constant for U16_GET_SUPPLEMENTARY. | |
96 * @internal | |
97 */ | |
98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) | |
99 | |
100 /** | |
101 * Get a supplementary code point value (U+10000..U+10ffff) | |
102 * from its lead and trail surrogates. | |
103 * The result is undefined if the input values are not | |
104 * lead and trail surrogates. | |
105 * | |
106 * @param lead lead surrogate (U+d800..U+dbff) | |
107 * @param trail trail surrogate (U+dc00..U+dfff) | |
108 * @return supplementary code point (U+10000..U+10ffff) | |
109 * @stable ICU 2.4 | |
110 */ | |
111 #define U16_GET_SUPPLEMENTARY(lead, trail) \ | |
112 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) | |
113 | |
114 | |
115 /** | |
116 * Get the lead surrogate (0xd800..0xdbff) for a | |
117 * supplementary code point (0x10000..0x10ffff). | |
118 * @param supplementary 32-bit code point (U+10000..U+10ffff) | |
119 * @return lead surrogate (U+d800..U+dbff) for supplementary | |
120 * @stable ICU 2.4 | |
121 */ | |
122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) | |
123 | |
124 /** | |
125 * Get the trail surrogate (0xdc00..0xdfff) for a | |
126 * supplementary code point (0x10000..0x10ffff). | |
127 * @param supplementary 32-bit code point (U+10000..U+10ffff) | |
128 * @return trail surrogate (U+dc00..U+dfff) for supplementary | |
129 * @stable ICU 2.4 | |
130 */ | |
131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) | |
132 | |
133 /** | |
134 * How many 16-bit code units are used to encode this Unicode code point? (1 or
2) | |
135 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff)
. | |
136 * @param c 32-bit code point | |
137 * @return 1 or 2 | |
138 * @stable ICU 2.4 | |
139 */ | |
140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) | |
141 | |
142 /** | |
143 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10f
fff). | |
144 * @return 2 | |
145 * @stable ICU 2.4 | |
146 */ | |
147 #define U16_MAX_LENGTH 2 | |
148 | |
149 /** | |
150 * Get a code point from a string at a random-access offset, | |
151 * without changing the offset. | |
152 * "Unsafe" macro, assumes well-formed UTF-16. | |
153 * | |
154 * The offset may point to either the lead or trail surrogate unit | |
155 * for a supplementary code point, in which case the macro will read | |
156 * the adjacent matching surrogate as well. | |
157 * The result is undefined if the offset points to a single, unpaired surrogate. | |
158 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT
. | |
159 * | |
160 * @param s const UChar * string | |
161 * @param i string offset | |
162 * @param c output UChar32 variable | |
163 * @see U16_GET | |
164 * @stable ICU 2.4 | |
165 */ | |
166 #define U16_GET_UNSAFE(s, i, c) { \ | |
167 (c)=(s)[i]; \ | |
168 if(U16_IS_SURROGATE(c)) { \ | |
169 if(U16_IS_SURROGATE_LEAD(c)) { \ | |
170 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ | |
171 } else { \ | |
172 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ | |
173 } \ | |
174 } \ | |
175 } | |
176 | |
177 /** | |
178 * Get a code point from a string at a random-access offset, | |
179 * without changing the offset. | |
180 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
181 * | |
182 * The offset may point to either the lead or trail surrogate unit | |
183 * for a supplementary code point, in which case the macro will read | |
184 * the adjacent matching surrogate as well. | |
185 * If the offset points to a single, unpaired surrogate, then that itself | |
186 * will be returned as the code point. | |
187 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT
. | |
188 * | |
189 * @param s const UChar * string | |
190 * @param start starting string offset (usually 0) | |
191 * @param i string offset, must be start<=i<length | |
192 * @param length string length | |
193 * @param c output UChar32 variable | |
194 * @see U16_GET_UNSAFE | |
195 * @stable ICU 2.4 | |
196 */ | |
197 #define U16_GET(s, start, i, length, c) { \ | |
198 (c)=(s)[i]; \ | |
199 if(U16_IS_SURROGATE(c)) { \ | |
200 uint16_t __c2; \ | |
201 if(U16_IS_SURROGATE_LEAD(c)) { \ | |
202 if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | |
203 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
204 } \ | |
205 } else { \ | |
206 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
207 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
208 } \ | |
209 } \ | |
210 } \ | |
211 } | |
212 | |
213 /* definitions with forward iteration --------------------------------------- */ | |
214 | |
215 /** | |
216 * Get a code point from a string at a code point boundary offset, | |
217 * and advance the offset to the next code point boundary. | |
218 * (Post-incrementing forward iteration.) | |
219 * "Unsafe" macro, assumes well-formed UTF-16. | |
220 * | |
221 * The offset may point to the lead surrogate unit | |
222 * for a supplementary code point, in which case the macro will read | |
223 * the following trail surrogate as well. | |
224 * If the offset points to a trail surrogate, then that itself | |
225 * will be returned as the code point. | |
226 * The result is undefined if the offset points to a single, unpaired lead surro
gate. | |
227 * | |
228 * @param s const UChar * string | |
229 * @param i string offset | |
230 * @param c output UChar32 variable | |
231 * @see U16_NEXT | |
232 * @stable ICU 2.4 | |
233 */ | |
234 #define U16_NEXT_UNSAFE(s, i, c) { \ | |
235 (c)=(s)[(i)++]; \ | |
236 if(U16_IS_LEAD(c)) { \ | |
237 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ | |
238 } \ | |
239 } | |
240 | |
241 /** | |
242 * Get a code point from a string at a code point boundary offset, | |
243 * and advance the offset to the next code point boundary. | |
244 * (Post-incrementing forward iteration.) | |
245 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
246 * | |
247 * The offset may point to the lead surrogate unit | |
248 * for a supplementary code point, in which case the macro will read | |
249 * the following trail surrogate as well. | |
250 * If the offset points to a trail surrogate or | |
251 * to a single, unpaired lead surrogate, then that itself | |
252 * will be returned as the code point. | |
253 * | |
254 * @param s const UChar * string | |
255 * @param i string offset, must be i<length | |
256 * @param length string length | |
257 * @param c output UChar32 variable | |
258 * @see U16_NEXT_UNSAFE | |
259 * @stable ICU 2.4 | |
260 */ | |
261 #define U16_NEXT(s, i, length, c) { \ | |
262 (c)=(s)[(i)++]; \ | |
263 if(U16_IS_LEAD(c)) { \ | |
264 uint16_t __c2; \ | |
265 if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | |
266 ++(i); \ | |
267 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
268 } \ | |
269 } \ | |
270 } | |
271 | |
272 /** | |
273 * Append a code point to a string, overwriting 1 or 2 code units. | |
274 * The offset points to the current end of the string contents | |
275 * and is advanced (post-increment). | |
276 * "Unsafe" macro, assumes a valid code point and sufficient space in the string
. | |
277 * Otherwise, the result is undefined. | |
278 * | |
279 * @param s const UChar * string buffer | |
280 * @param i string offset | |
281 * @param c code point to append | |
282 * @see U16_APPEND | |
283 * @stable ICU 2.4 | |
284 */ | |
285 #define U16_APPEND_UNSAFE(s, i, c) { \ | |
286 if((uint32_t)(c)<=0xffff) { \ | |
287 (s)[(i)++]=(uint16_t)(c); \ | |
288 } else { \ | |
289 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | |
290 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | |
291 } \ | |
292 } | |
293 | |
294 /** | |
295 * Append a code point to a string, overwriting 1 or 2 code units. | |
296 * The offset points to the current end of the string contents | |
297 * and is advanced (post-increment). | |
298 * "Safe" macro, checks for a valid code point. | |
299 * If a surrogate pair is written, checks for sufficient space in the string. | |
300 * If the code point is not valid or a trail surrogate does not fit, | |
301 * then isError is set to TRUE. | |
302 * | |
303 * @param s const UChar * string buffer | |
304 * @param i string offset, must be i<capacity | |
305 * @param capacity size of the string buffer | |
306 * @param c code point to append | |
307 * @param isError output UBool set to TRUE if an error occurs, otherwise not mod
ified | |
308 * @see U16_APPEND_UNSAFE | |
309 * @stable ICU 2.4 | |
310 */ | |
311 #define U16_APPEND(s, i, capacity, c, isError) { \ | |
312 if((uint32_t)(c)<=0xffff) { \ | |
313 (s)[(i)++]=(uint16_t)(c); \ | |
314 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ | |
315 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | |
316 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | |
317 } else /* c>0x10ffff or not enough space */ { \ | |
318 (isError)=TRUE; \ | |
319 } \ | |
320 } | |
321 | |
322 /** | |
323 * Advance the string offset from one code point boundary to the next. | |
324 * (Post-incrementing iteration.) | |
325 * "Unsafe" macro, assumes well-formed UTF-16. | |
326 * | |
327 * @param s const UChar * string | |
328 * @param i string offset | |
329 * @see U16_FWD_1 | |
330 * @stable ICU 2.4 | |
331 */ | |
332 #define U16_FWD_1_UNSAFE(s, i) { \ | |
333 if(U16_IS_LEAD((s)[(i)++])) { \ | |
334 ++(i); \ | |
335 } \ | |
336 } | |
337 | |
338 /** | |
339 * Advance the string offset from one code point boundary to the next. | |
340 * (Post-incrementing iteration.) | |
341 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
342 * | |
343 * @param s const UChar * string | |
344 * @param i string offset, must be i<length | |
345 * @param length string length | |
346 * @see U16_FWD_1_UNSAFE | |
347 * @stable ICU 2.4 | |
348 */ | |
349 #define U16_FWD_1(s, i, length) { \ | |
350 if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \ | |
351 ++(i); \ | |
352 } \ | |
353 } | |
354 | |
355 /** | |
356 * Advance the string offset from one code point boundary to the n-th next one, | |
357 * i.e., move forward by n code points. | |
358 * (Post-incrementing iteration.) | |
359 * "Unsafe" macro, assumes well-formed UTF-16. | |
360 * | |
361 * @param s const UChar * string | |
362 * @param i string offset | |
363 * @param n number of code points to skip | |
364 * @see U16_FWD_N | |
365 * @stable ICU 2.4 | |
366 */ | |
367 #define U16_FWD_N_UNSAFE(s, i, n) { \ | |
368 int32_t __N=(n); \ | |
369 while(__N>0) { \ | |
370 U16_FWD_1_UNSAFE(s, i); \ | |
371 --__N; \ | |
372 } \ | |
373 } | |
374 | |
375 /** | |
376 * Advance the string offset from one code point boundary to the n-th next one, | |
377 * i.e., move forward by n code points. | |
378 * (Post-incrementing iteration.) | |
379 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
380 * | |
381 * @param s const UChar * string | |
382 * @param i string offset, must be i<length | |
383 * @param length string length | |
384 * @param n number of code points to skip | |
385 * @see U16_FWD_N_UNSAFE | |
386 * @stable ICU 2.4 | |
387 */ | |
388 #define U16_FWD_N(s, i, length, n) { \ | |
389 int32_t __N=(n); \ | |
390 while(__N>0 && (i)<(length)) { \ | |
391 U16_FWD_1(s, i, length); \ | |
392 --__N; \ | |
393 } \ | |
394 } | |
395 | |
396 /** | |
397 * Adjust a random-access offset to a code point boundary | |
398 * at the start of a code point. | |
399 * If the offset points to the trail surrogate of a surrogate pair, | |
400 * then the offset is decremented. | |
401 * Otherwise, it is not modified. | |
402 * "Unsafe" macro, assumes well-formed UTF-16. | |
403 * | |
404 * @param s const UChar * string | |
405 * @param i string offset | |
406 * @see U16_SET_CP_START | |
407 * @stable ICU 2.4 | |
408 */ | |
409 #define U16_SET_CP_START_UNSAFE(s, i) { \ | |
410 if(U16_IS_TRAIL((s)[i])) { \ | |
411 --(i); \ | |
412 } \ | |
413 } | |
414 | |
415 /** | |
416 * Adjust a random-access offset to a code point boundary | |
417 * at the start of a code point. | |
418 * If the offset points to the trail surrogate of a surrogate pair, | |
419 * then the offset is decremented. | |
420 * Otherwise, it is not modified. | |
421 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
422 * | |
423 * @param s const UChar * string | |
424 * @param start starting string offset (usually 0) | |
425 * @param i string offset, must be start<=i | |
426 * @see U16_SET_CP_START_UNSAFE | |
427 * @stable ICU 2.4 | |
428 */ | |
429 #define U16_SET_CP_START(s, start, i) { \ | |
430 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | |
431 --(i); \ | |
432 } \ | |
433 } | |
434 | |
435 /* definitions with backward iteration -------------------------------------- */ | |
436 | |
437 /** | |
438 * Move the string offset from one code point boundary to the previous one | |
439 * and get the code point between them. | |
440 * (Pre-decrementing backward iteration.) | |
441 * "Unsafe" macro, assumes well-formed UTF-16. | |
442 * | |
443 * The input offset may be the same as the string length. | |
444 * If the offset is behind a trail surrogate unit | |
445 * for a supplementary code point, then the macro will read | |
446 * the preceding lead surrogate as well. | |
447 * If the offset is behind a lead surrogate, then that itself | |
448 * will be returned as the code point. | |
449 * The result is undefined if the offset is behind a single, unpaired trail surr
ogate. | |
450 * | |
451 * @param s const UChar * string | |
452 * @param i string offset | |
453 * @param c output UChar32 variable | |
454 * @see U16_PREV | |
455 * @stable ICU 2.4 | |
456 */ | |
457 #define U16_PREV_UNSAFE(s, i, c) { \ | |
458 (c)=(s)[--(i)]; \ | |
459 if(U16_IS_TRAIL(c)) { \ | |
460 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ | |
461 } \ | |
462 } | |
463 | |
464 /** | |
465 * Move the string offset from one code point boundary to the previous one | |
466 * and get the code point between them. | |
467 * (Pre-decrementing backward iteration.) | |
468 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
469 * | |
470 * The input offset may be the same as the string length. | |
471 * If the offset is behind a trail surrogate unit | |
472 * for a supplementary code point, then the macro will read | |
473 * the preceding lead surrogate as well. | |
474 * If the offset is behind a lead surrogate or behind a single, unpaired | |
475 * trail surrogate, then that itself | |
476 * will be returned as the code point. | |
477 * | |
478 * @param s const UChar * string | |
479 * @param start starting string offset (usually 0) | |
480 * @param i string offset, must be start<i | |
481 * @param c output UChar32 variable | |
482 * @see U16_PREV_UNSAFE | |
483 * @stable ICU 2.4 | |
484 */ | |
485 #define U16_PREV(s, start, i, c) { \ | |
486 (c)=(s)[--(i)]; \ | |
487 if(U16_IS_TRAIL(c)) { \ | |
488 uint16_t __c2; \ | |
489 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
490 --(i); \ | |
491 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
492 } \ | |
493 } \ | |
494 } | |
495 | |
496 /** | |
497 * Move the string offset from one code point boundary to the previous one. | |
498 * (Pre-decrementing backward iteration.) | |
499 * The input offset may be the same as the string length. | |
500 * "Unsafe" macro, assumes well-formed UTF-16. | |
501 * | |
502 * @param s const UChar * string | |
503 * @param i string offset | |
504 * @see U16_BACK_1 | |
505 * @stable ICU 2.4 | |
506 */ | |
507 #define U16_BACK_1_UNSAFE(s, i) { \ | |
508 if(U16_IS_TRAIL((s)[--(i)])) { \ | |
509 --(i); \ | |
510 } \ | |
511 } | |
512 | |
513 /** | |
514 * Move the string offset from one code point boundary to the previous one. | |
515 * (Pre-decrementing backward iteration.) | |
516 * The input offset may be the same as the string length. | |
517 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
518 * | |
519 * @param s const UChar * string | |
520 * @param start starting string offset (usually 0) | |
521 * @param i string offset, must be start<i | |
522 * @see U16_BACK_1_UNSAFE | |
523 * @stable ICU 2.4 | |
524 */ | |
525 #define U16_BACK_1(s, start, i) { \ | |
526 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | |
527 --(i); \ | |
528 } \ | |
529 } | |
530 | |
531 /** | |
532 * Move the string offset from one code point boundary to the n-th one before it
, | |
533 * i.e., move backward by n code points. | |
534 * (Pre-decrementing backward iteration.) | |
535 * The input offset may be the same as the string length. | |
536 * "Unsafe" macro, assumes well-formed UTF-16. | |
537 * | |
538 * @param s const UChar * string | |
539 * @param i string offset | |
540 * @param n number of code points to skip | |
541 * @see U16_BACK_N | |
542 * @stable ICU 2.4 | |
543 */ | |
544 #define U16_BACK_N_UNSAFE(s, i, n) { \ | |
545 int32_t __N=(n); \ | |
546 while(__N>0) { \ | |
547 U16_BACK_1_UNSAFE(s, i); \ | |
548 --__N; \ | |
549 } \ | |
550 } | |
551 | |
552 /** | |
553 * Move the string offset from one code point boundary to the n-th one before it
, | |
554 * i.e., move backward by n code points. | |
555 * (Pre-decrementing backward iteration.) | |
556 * The input offset may be the same as the string length. | |
557 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
558 * | |
559 * @param s const UChar * string | |
560 * @param start start of string | |
561 * @param i string offset, must be start<i | |
562 * @param n number of code points to skip | |
563 * @see U16_BACK_N_UNSAFE | |
564 * @stable ICU 2.4 | |
565 */ | |
566 #define U16_BACK_N(s, start, i, n) { \ | |
567 int32_t __N=(n); \ | |
568 while(__N>0 && (i)>(start)) { \ | |
569 U16_BACK_1(s, start, i); \ | |
570 --__N; \ | |
571 } \ | |
572 } | |
573 | |
574 /** | |
575 * Adjust a random-access offset to a code point boundary after a code point. | |
576 * If the offset is behind the lead surrogate of a surrogate pair, | |
577 * then the offset is incremented. | |
578 * Otherwise, it is not modified. | |
579 * The input offset may be the same as the string length. | |
580 * "Unsafe" macro, assumes well-formed UTF-16. | |
581 * | |
582 * @param s const UChar * string | |
583 * @param i string offset | |
584 * @see U16_SET_CP_LIMIT | |
585 * @stable ICU 2.4 | |
586 */ | |
587 #define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ | |
588 if(U16_IS_LEAD((s)[(i)-1])) { \ | |
589 ++(i); \ | |
590 } \ | |
591 } | |
592 | |
593 /** | |
594 * Adjust a random-access offset to a code point boundary after a code point. | |
595 * If the offset is behind the lead surrogate of a surrogate pair, | |
596 * then the offset is incremented. | |
597 * Otherwise, it is not modified. | |
598 * The input offset may be the same as the string length. | |
599 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
600 * | |
601 * @param s const UChar * string | |
602 * @param start starting string offset (usually 0) | |
603 * @param i string offset, start<=i<=length | |
604 * @param length string length | |
605 * @see U16_SET_CP_LIMIT_UNSAFE | |
606 * @stable ICU 2.4 | |
607 */ | |
608 #define U16_SET_CP_LIMIT(s, start, i, length) { \ | |
609 if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s
)[i])) { \ | |
610 ++(i); \ | |
611 } \ | |
612 } | |
613 | |
614 #endif | |
OLD | NEW |