OLD | NEW |
1 /* | 1 /* |
2 ****************************************************************************** | 2 ****************************************************************************** |
3 * Copyright (C) 1996-2014, International Business Machines Corporation and other
s. | 3 * Copyright (C) 1996-2015, International Business Machines Corporation and other
s. |
4 * All Rights Reserved. | 4 * All Rights Reserved. |
5 ****************************************************************************** | 5 ****************************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 #ifndef UBRK_H | 8 #ifndef UBRK_H |
9 #define UBRK_H | 9 #define UBRK_H |
10 | 10 |
11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
12 #include "unicode/uloc.h" | 12 #include "unicode/uloc.h" |
13 #include "unicode/utext.h" | 13 #include "unicode/utext.h" |
(...skipping 24 matching lines...) Expand all Loading... |
38 * | 38 * |
39 * The BreakIterator C API defines methods for finding the location | 39 * The BreakIterator C API defines methods for finding the location |
40 * of boundaries in text. Pointer to a UBreakIterator maintain a | 40 * of boundaries in text. Pointer to a UBreakIterator maintain a |
41 * current position and scan over text returning the index of characters | 41 * current position and scan over text returning the index of characters |
42 * where boundaries occur. | 42 * where boundaries occur. |
43 * <p> | 43 * <p> |
44 * Line boundary analysis determines where a text string can be broken | 44 * Line boundary analysis determines where a text string can be broken |
45 * when line-wrapping. The mechanism correctly handles punctuation and | 45 * when line-wrapping. The mechanism correctly handles punctuation and |
46 * hyphenated words. | 46 * hyphenated words. |
47 * <p> | 47 * <p> |
| 48 * Note: The locale keyword "lb" can be used to modify line break |
| 49 * behavior according to the CSS level 3 line-break options, see |
| 50 * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example: |
| 51 * "ja@lb=strict", "zh@lb=loose". |
| 52 * <p> |
48 * Sentence boundary analysis allows selection with correct | 53 * Sentence boundary analysis allows selection with correct |
49 * interpretation of periods within numbers and abbreviations, and | 54 * interpretation of periods within numbers and abbreviations, and |
50 * trailing punctuation marks such as quotation marks and parentheses. | 55 * trailing punctuation marks such as quotation marks and parentheses. |
51 * <p> | 56 * <p> |
| 57 * Note: The locale keyword "ss" can be used to enable use of |
| 58 * segmentation suppression data (preventing breaks in English after |
| 59 * abbreviations such as "Mr." or "Est.", for example), as follows: |
| 60 * "en@ss=standard". |
| 61 * <p> |
52 * Word boundary analysis is used by search and replace functions, as | 62 * Word boundary analysis is used by search and replace functions, as |
53 * well as within text editing applications that allow the user to | 63 * well as within text editing applications that allow the user to |
54 * select words with a double click. Word selection provides correct | 64 * select words with a double click. Word selection provides correct |
55 * interpretation of punctuation marks within and following | 65 * interpretation of punctuation marks within and following |
56 * words. Characters that are not part of a word, such as symbols or | 66 * words. Characters that are not part of a word, such as symbols or |
57 * punctuation marks, have word-breaks on both sides. | 67 * punctuation marks, have word-breaks on both sides. |
58 * <p> | 68 * <p> |
59 * Character boundary analysis identifies the boundaries of | 69 * Character boundary analysis identifies the boundaries of |
60 * "Extended Grapheme Clusters", which are groupings of codepoints | 70 * "Extended Grapheme Clusters", which are groupings of codepoints |
61 * that should be treated as character-like units for many text operations. | 71 * that should be treated as character-like units for many text operations. |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
195 /** Tag value for a hard, or mandatory line break */ | 205 /** Tag value for a hard, or mandatory line break */ |
196 } USentenceBreakTag; | 206 } USentenceBreakTag; |
197 | 207 |
198 | 208 |
199 /** | 209 /** |
200 * Open a new UBreakIterator for locating text boundaries for a specified locale
. | 210 * Open a new UBreakIterator for locating text boundaries for a specified locale
. |
201 * A UBreakIterator may be used for detecting character, line, word, | 211 * A UBreakIterator may be used for detecting character, line, word, |
202 * and sentence breaks in text. | 212 * and sentence breaks in text. |
203 * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_W
ORD, | 213 * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_W
ORD, |
204 * UBRK_LINE, UBRK_SENTENCE | 214 * UBRK_LINE, UBRK_SENTENCE |
205 * @param locale The locale specifying the text-breaking conventions. | 215 * @param locale The locale specifying the text-breaking conventions. Note that |
| 216 * locale keys such as "lb" and "ss" may be used to modify text break behavior, |
| 217 * see general discussion of BreakIterator C API. |
206 * @param text The text to be iterated over. | 218 * @param text The text to be iterated over. |
207 * @param textLength The number of characters in text, or -1 if null-terminated. | 219 * @param textLength The number of characters in text, or -1 if null-terminated. |
208 * @param status A UErrorCode to receive any errors. | 220 * @param status A UErrorCode to receive any errors. |
209 * @return A UBreakIterator for the specified locale. | 221 * @return A UBreakIterator for the specified locale. |
210 * @see ubrk_openRules | 222 * @see ubrk_openRules |
211 * @stable ICU 2.0 | 223 * @stable ICU 2.0 |
212 */ | 224 */ |
213 U_STABLE UBreakIterator* U_EXPORT2 | 225 U_STABLE UBreakIterator* U_EXPORT2 |
214 ubrk_open(UBreakIteratorType type, | 226 ubrk_open(UBreakIteratorType type, |
215 const char *locale, | 227 const char *locale, |
(...skipping 315 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
531 * @stable ICU 49 | 543 * @stable ICU 49 |
532 */ | 544 */ |
533 U_STABLE void U_EXPORT2 | 545 U_STABLE void U_EXPORT2 |
534 ubrk_refreshUText(UBreakIterator *bi, | 546 ubrk_refreshUText(UBreakIterator *bi, |
535 UText *text, | 547 UText *text, |
536 UErrorCode *status); | 548 UErrorCode *status); |
537 | 549 |
538 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 550 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
539 | 551 |
540 #endif | 552 #endif |
OLD | NEW |