| OLD | NEW |
| 1 /* | 1 /* |
| 2 ****************************************************************************** | 2 ****************************************************************************** |
| 3 * Copyright (C) 1996-2014, International Business Machines Corporation and other
s. | 3 * Copyright (C) 1996-2015, International Business Machines Corporation and other
s. |
| 4 * All Rights Reserved. | 4 * All Rights Reserved. |
| 5 ****************************************************************************** | 5 ****************************************************************************** |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef UBRK_H | 8 #ifndef UBRK_H |
| 9 #define UBRK_H | 9 #define UBRK_H |
| 10 | 10 |
| 11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
| 12 #include "unicode/uloc.h" | 12 #include "unicode/uloc.h" |
| 13 #include "unicode/utext.h" | 13 #include "unicode/utext.h" |
| (...skipping 24 matching lines...) Expand all Loading... |
| 38 * | 38 * |
| 39 * The BreakIterator C API defines methods for finding the location | 39 * The BreakIterator C API defines methods for finding the location |
| 40 * of boundaries in text. Pointer to a UBreakIterator maintain a | 40 * of boundaries in text. Pointer to a UBreakIterator maintain a |
| 41 * current position and scan over text returning the index of characters | 41 * current position and scan over text returning the index of characters |
| 42 * where boundaries occur. | 42 * where boundaries occur. |
| 43 * <p> | 43 * <p> |
| 44 * Line boundary analysis determines where a text string can be broken | 44 * Line boundary analysis determines where a text string can be broken |
| 45 * when line-wrapping. The mechanism correctly handles punctuation and | 45 * when line-wrapping. The mechanism correctly handles punctuation and |
| 46 * hyphenated words. | 46 * hyphenated words. |
| 47 * <p> | 47 * <p> |
| 48 * Note: The locale keyword "lb" can be used to modify line break |
| 49 * behavior according to the CSS level 3 line-break options, see |
| 50 * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example: |
| 51 * "ja@lb=strict", "zh@lb=loose". |
| 52 * <p> |
| 48 * Sentence boundary analysis allows selection with correct | 53 * Sentence boundary analysis allows selection with correct |
| 49 * interpretation of periods within numbers and abbreviations, and | 54 * interpretation of periods within numbers and abbreviations, and |
| 50 * trailing punctuation marks such as quotation marks and parentheses. | 55 * trailing punctuation marks such as quotation marks and parentheses. |
| 51 * <p> | 56 * <p> |
| 57 * Note: The locale keyword "ss" can be used to enable use of |
| 58 * segmentation suppression data (preventing breaks in English after |
| 59 * abbreviations such as "Mr." or "Est.", for example), as follows: |
| 60 * "en@ss=standard". |
| 61 * <p> |
| 52 * Word boundary analysis is used by search and replace functions, as | 62 * Word boundary analysis is used by search and replace functions, as |
| 53 * well as within text editing applications that allow the user to | 63 * well as within text editing applications that allow the user to |
| 54 * select words with a double click. Word selection provides correct | 64 * select words with a double click. Word selection provides correct |
| 55 * interpretation of punctuation marks within and following | 65 * interpretation of punctuation marks within and following |
| 56 * words. Characters that are not part of a word, such as symbols or | 66 * words. Characters that are not part of a word, such as symbols or |
| 57 * punctuation marks, have word-breaks on both sides. | 67 * punctuation marks, have word-breaks on both sides. |
| 58 * <p> | 68 * <p> |
| 59 * Character boundary analysis identifies the boundaries of | 69 * Character boundary analysis identifies the boundaries of |
| 60 * "Extended Grapheme Clusters", which are groupings of codepoints | 70 * "Extended Grapheme Clusters", which are groupings of codepoints |
| 61 * that should be treated as character-like units for many text operations. | 71 * that should be treated as character-like units for many text operations. |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 195 /** Tag value for a hard, or mandatory line break */ | 205 /** Tag value for a hard, or mandatory line break */ |
| 196 } USentenceBreakTag; | 206 } USentenceBreakTag; |
| 197 | 207 |
| 198 | 208 |
| 199 /** | 209 /** |
| 200 * Open a new UBreakIterator for locating text boundaries for a specified locale
. | 210 * Open a new UBreakIterator for locating text boundaries for a specified locale
. |
| 201 * A UBreakIterator may be used for detecting character, line, word, | 211 * A UBreakIterator may be used for detecting character, line, word, |
| 202 * and sentence breaks in text. | 212 * and sentence breaks in text. |
| 203 * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_W
ORD, | 213 * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_W
ORD, |
| 204 * UBRK_LINE, UBRK_SENTENCE | 214 * UBRK_LINE, UBRK_SENTENCE |
| 205 * @param locale The locale specifying the text-breaking conventions. | 215 * @param locale The locale specifying the text-breaking conventions. Note that |
| 216 * locale keys such as "lb" and "ss" may be used to modify text break behavior, |
| 217 * see general discussion of BreakIterator C API. |
| 206 * @param text The text to be iterated over. | 218 * @param text The text to be iterated over. |
| 207 * @param textLength The number of characters in text, or -1 if null-terminated. | 219 * @param textLength The number of characters in text, or -1 if null-terminated. |
| 208 * @param status A UErrorCode to receive any errors. | 220 * @param status A UErrorCode to receive any errors. |
| 209 * @return A UBreakIterator for the specified locale. | 221 * @return A UBreakIterator for the specified locale. |
| 210 * @see ubrk_openRules | 222 * @see ubrk_openRules |
| 211 * @stable ICU 2.0 | 223 * @stable ICU 2.0 |
| 212 */ | 224 */ |
| 213 U_STABLE UBreakIterator* U_EXPORT2 | 225 U_STABLE UBreakIterator* U_EXPORT2 |
| 214 ubrk_open(UBreakIteratorType type, | 226 ubrk_open(UBreakIteratorType type, |
| 215 const char *locale, | 227 const char *locale, |
| (...skipping 315 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 531 * @stable ICU 49 | 543 * @stable ICU 49 |
| 532 */ | 544 */ |
| 533 U_STABLE void U_EXPORT2 | 545 U_STABLE void U_EXPORT2 |
| 534 ubrk_refreshUText(UBreakIterator *bi, | 546 ubrk_refreshUText(UBreakIterator *bi, |
| 535 UText *text, | 547 UText *text, |
| 536 UErrorCode *status); | 548 UErrorCode *status); |
| 537 | 549 |
| 538 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 550 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
| 539 | 551 |
| 540 #endif | 552 #endif |
| OLD | NEW |