OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * Copyright (C) 2007-2010, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ******************************************************************************* | |
6 * | |
7 | |
8 * File PLURFMT.H | |
9 * | |
10 * Modification History:* | |
11 * Date Name Description | |
12 * | |
13 ******************************************************************************** | |
14 */ | |
15 | |
16 #ifndef PLURFMT | |
17 #define PLURFMT | |
18 | |
19 #include "unicode/utypes.h" | |
20 | |
21 /** | |
22 * \file | |
23 * \brief C++ API: PluralFormat object | |
24 */ | |
25 | |
26 #if !UCONFIG_NO_FORMATTING | |
27 | |
28 #include "unicode/numfmt.h" | |
29 #include "unicode/plurrule.h" | |
30 | |
31 U_NAMESPACE_BEGIN | |
32 | |
33 class Hashtable; | |
34 | |
35 /** | |
36 * <p> | |
37 * <code>PluralFormat</code> supports the creation of internationalized | |
38 * messages with plural inflection. It is based on <i>plural | |
39 * selection</i>, i.e. the caller specifies messages for each | |
40 * plural case that can appear in the users language and the | |
41 * <code>PluralFormat</code> selects the appropriate message based on | |
42 * the number. | |
43 * </p> | |
44 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> | |
45 * <p> | |
46 * Different languages have different ways to inflect | |
47 * plurals. Creating internationalized messages that include plural | |
48 * forms is only feasible when the framework is able to handle plural | |
49 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> | |
50 * doesn't handle this well, because it attaches a number interval to | |
51 * each message and selects the message whose interval contains a | |
52 * given number. This can only handle a finite number of | |
53 * intervals. But in some languages, like Polish, one plural case | |
54 * applies to infinitely many intervals (e.g., paucal applies to | |
55 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or | |
56 * 14). Thus <code>ChoiceFormat</code> is not adequate. | |
57 * </p><p> | |
58 * <code>PluralFormat</code> deals with this by breaking the problem | |
59 * into two parts: | |
60 * <ul> | |
61 * <li>It uses <code>PluralRules</code> that can define more complex | |
62 * conditions for a plural case than just a single interval. These plural | |
63 * rules define both what plural cases exist in a language, and to | |
64 * which numbers these cases apply. | |
65 * <li>It provides predefined plural rules for many locales. Thus, the programme
r | |
66 * need not worry about the plural cases of a language. On the flip side, | |
67 * the localizer does not have to specify the plural cases; he can simply | |
68 * use the predefined keywords. The whole plural formatting of messages can | |
69 * be done using localized patterns from resource bundles. For predefined pl
ural | |
70 * rules, see CLDR <i>Language Plural Rules</i> page at | |
71 * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_
rules.html | |
72 * </ul> | |
73 * </p> | |
74 * <h4>Usage of <code>PluralFormat</code></h4> | |
75 * <p> | |
76 * This discussion assumes that you use <code>PluralFormat</code> with | |
77 * a predefined set of plural rules. You can create one using one of | |
78 * the constructors that takes a <code>locale</code> object. To | |
79 * specify the message pattern, you can either pass it to the | |
80 * constructor or set it explicitly using the | |
81 * <code>applyPattern()</code> method. The <code>format()</code> | |
82 * method takes a number object and selects the message of the | |
83 * matching plural case. This message will be returned. | |
84 * </p> | |
85 * <h5>Patterns and Their Interpretation</h5> | |
86 * <p> | |
87 * The pattern text defines the message output for each plural case of the | |
88 * used locale. The pattern is a sequence of | |
89 * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white | |
90 * space characters. Each clause assigns the message <code><i>message</i></code> | |
91 * to the plural case identified by <code><i>caseKeyword</i></code>. | |
92 * </p><p> | |
93 * There are 6 predefined casekeyword in ICU - 'zero', 'one', 'two', 'few', 'man
y' and | |
94 * 'other'. You always have to define a message text for the default plural case | |
95 * "<code>other</code>" which is contained in every rule set. If the plural | |
96 * rules of the <code>PluralFormat</code> object do not contain a plural case | |
97 * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING | |
98 * will be set to status. | |
99 * If you do not specify a message text for a particular plural case, the | |
100 * message text of the plural case "<code>other</code>" gets assigned to this | |
101 * plural case. If you specify more than one message for the same plural case, | |
102 * U_DUPLICATE_KEYWORD will be set to status. | |
103 * <br> | |
104 * Spaces between <code><i>caseKeyword</i></code> and | |
105 * <code><i>message</i></code> will be ignored; spaces within | |
106 * <code><i>message</i></code> will be preserved. | |
107 * </p><p> | |
108 * The message text for a particular plural case may contain other message | |
109 * format patterns. <code>PluralFormat</code> preserves these so that you | |
110 * can use the strings produced by <code>PluralFormat</code> with other | |
111 * formatters. If you are using <code>PluralFormat</code> inside a | |
112 * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will | |
113 * automatically evaluate the resulting format pattern.<br> | |
114 * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed | |
115 * in message texts to define a nested format pattern.<br> | |
116 * The pound sign (<code>#</code>) will be interpreted as the number placeholder | |
117 * in the message text, if it is not contained in curly braces (to preserve | |
118 * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will | |
119 * replace each of those pound signs by the number passed to the | |
120 * <code>format()</code> method. It will be formatted using a | |
121 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you | |
122 * need special number formatting, you have to explicitly specify a | |
123 * <code>NumberFormat</code> for the <code>PluralFormat</code> to use. | |
124 * </p> | |
125 * Example | |
126 * <pre> | |
127 * \code | |
128 * UErrorCode status = U_ZERO_ERROR; | |
129 * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural, | |
130 * one{{0, number, C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans
la liste."), | |
131 * Locale("fr"), status); | |
132 * if (U_FAILURE(status)) { | |
133 * return; | |
134 * } | |
135 * Formattable args1[] = {(int32_t)0}; | |
136 * Formattable args2[] = {(int32_t)3}; | |
137 * FieldPosition ignore(FieldPosition::DONT_CARE); | |
138 * UnicodeString result; | |
139 * msgFmt->format(args1, 1, result, ignore, status); | |
140 * cout << result << endl; | |
141 * result.remove(); | |
142 * msgFmt->format(args2, 1, result, ignore, status); | |
143 * cout << result << endl; | |
144 * \endcode | |
145 * </pre> | |
146 * Produces the output:<br> | |
147 * <code>C'est 0,0 fichier dans la liste.</code><br> | |
148 * <code>Ce sont 3 fichiers dans la liste.</code> | |
149 * <p> | |
150 * <strong>Note:</strong><br> | |
151 * Currently <code>PluralFormat</code> | |
152 * does not make use of quotes like <code>MessageFormat</code>. | |
153 * If you use plural format strings with <code>MessageFormat</code> and want | |
154 * to use a quote sign <code>'</code>, you have to write <code>''</code>. | |
155 * <code>MessageFormat</code> unquotes this pattern and passes the unquoted | |
156 * pattern to <code>PluralFormat</code>. It's a bit trickier if you use | |
157 * nested formats that do quoting. In the example above, we wanted to insert | |
158 * <code>'</code> in the number format pattern. Since | |
159 * <code>NumberFormat</code> supports quotes, we had to insert | |
160 * <code>''</code>. But since <code>MessageFormat</code> unquotes the | |
161 * pattern before it gets passed to <code>PluralFormat</code>, we have to | |
162 * double these quotes, i.e. write <code>''''</code>. | |
163 * </p> | |
164 * <h4>Defining Custom Plural Rules</h4> | |
165 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can | |
166 * create a <code>PluralRules</code> object and pass it to | |
167 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this | |
168 * constructor, this locale will be used to format the number in the message | |
169 * texts. | |
170 * </p><p> | |
171 * For more information about <code>PluralRules</code>, see | |
172 * {@link PluralRules}. | |
173 * </p> | |
174 * | |
175 * ported from Java | |
176 * @stable ICU 4.0 | |
177 */ | |
178 | |
179 class U_I18N_API PluralFormat : public Format { | |
180 public: | |
181 | |
182 /** | |
183 * Creates a new <code>PluralFormat</code> for the default locale. | |
184 * This locale will be used to get the set of plural rules and for standard | |
185 * number formatting. | |
186 * @param status output param set to success/failure code on exit, which | |
187 * must not indicate a failure before the function call. | |
188 * @stable ICU 4.0 | |
189 */ | |
190 PluralFormat(UErrorCode& status); | |
191 | |
192 /** | |
193 * Creates a new <code>PluralFormat</code> for a given locale. | |
194 * @param locale the <code>PluralFormat</code> will be configured with | |
195 * rules for this locale. This locale will also be used for | |
196 * standard number formatting. | |
197 * @param status output param set to success/failure code on exit, which | |
198 * must not indicate a failure before the function call. | |
199 * @stable ICU 4.0 | |
200 */ | |
201 PluralFormat(const Locale& locale, UErrorCode& status); | |
202 | |
203 /** | |
204 * Creates a new <code>PluralFormat</code> for a given set of rules. | |
205 * The standard number formatting will be done using the default locale. | |
206 * @param rules defines the behavior of the <code>PluralFormat</code> | |
207 * object. | |
208 * @param status output param set to success/failure code on exit, which | |
209 * must not indicate a failure before the function call. | |
210 * @stable ICU 4.0 | |
211 */ | |
212 PluralFormat(const PluralRules& rules, UErrorCode& status); | |
213 | |
214 /** | |
215 * Creates a new <code>PluralFormat</code> for a given set of rules. | |
216 * The standard number formatting will be done using the given locale. | |
217 * @param locale the default number formatting will be done using this | |
218 * locale. | |
219 * @param rules defines the behavior of the <code>PluralFormat</code> | |
220 * object. | |
221 * @param status output param set to success/failure code on exit, which | |
222 * must not indicate a failure before the function call. | |
223 * @stable ICU 4.0 | |
224 */ | |
225 PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& sta
tus); | |
226 | |
227 /** | |
228 * Creates a new <code>PluralFormat</code> for a given pattern string. | |
229 * The default locale will be used to get the set of plural rules and for | |
230 * standard number formatting. | |
231 * @param pattern the pattern for this <code>PluralFormat</code>. | |
232 * errors are returned to status if the pattern is invalid. | |
233 * @param status output param set to success/failure code on exit, which | |
234 * must not indicate a failure before the function call. | |
235 * @stable ICU 4.0 | |
236 */ | |
237 PluralFormat(const UnicodeString& pattern, UErrorCode& status); | |
238 | |
239 /** | |
240 * Creates a new <code>PluralFormat</code> for a given pattern string and | |
241 * locale. | |
242 * The locale will be used to get the set of plural rules and for | |
243 * standard number formatting. | |
244 * @param locale the <code>PluralFormat</code> will be configured with | |
245 * rules for this locale. This locale will also be used for | |
246 * standard number formatting. | |
247 * @param pattern the pattern for this <code>PluralFormat</code>. | |
248 * errors are returned to status if the pattern is invalid. | |
249 * @param status output param set to success/failure code on exit, which | |
250 * must not indicate a failure before the function call. | |
251 * @stable ICU 4.0 | |
252 */ | |
253 PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode&
status); | |
254 | |
255 /** | |
256 * Creates a new <code>PluralFormat</code> for a given set of rules, a | |
257 * pattern and a locale. | |
258 * @param rules defines the behavior of the <code>PluralFormat</code> | |
259 * object. | |
260 * @param pattern the pattern for this <code>PluralFormat</code>. | |
261 * errors are returned to status if the pattern is invalid. | |
262 * @param status output param set to success/failure code on exit, which | |
263 * must not indicate a failure before the function call. | |
264 * @stable ICU 4.0 | |
265 */ | |
266 PluralFormat(const PluralRules& rules, | |
267 const UnicodeString& pattern, | |
268 UErrorCode& status); | |
269 | |
270 /** | |
271 * Creates a new <code>PluralFormat</code> for a given set of rules, a | |
272 * pattern and a locale. | |
273 * @param locale the <code>PluralFormat</code> will be configured with | |
274 * rules for this locale. This locale will also be used for | |
275 * standard number formatting. | |
276 * @param rules defines the behavior of the <code>PluralFormat</code> | |
277 * object. | |
278 * @param pattern the pattern for this <code>PluralFormat</code>. | |
279 * errors are returned to status if the pattern is invalid. | |
280 * @param status output param set to success/failure code on exit, which | |
281 * must not indicate a failure before the function call. | |
282 * @stable ICU 4.0 | |
283 */ | |
284 PluralFormat(const Locale& locale, | |
285 const PluralRules& rules, | |
286 const UnicodeString& pattern, | |
287 UErrorCode& status); | |
288 | |
289 /** | |
290 * copy constructor. | |
291 * @stable ICU 4.0 | |
292 */ | |
293 PluralFormat(const PluralFormat& other); | |
294 | |
295 /** | |
296 * Destructor. | |
297 * @stable ICU 4.0 | |
298 */ | |
299 virtual ~PluralFormat(); | |
300 | |
301 /** | |
302 * Sets the pattern used by this plural format. | |
303 * The method parses the pattern and creates a map of format strings | |
304 * for the plural rules. | |
305 * Patterns and their interpretation are specified in the class description. | |
306 * | |
307 * @param pattern the pattern for this plural format | |
308 * errors are returned to status if the pattern is invalid. | |
309 * @param status output param set to success/failure code on exit, which | |
310 * must not indicate a failure before the function call. | |
311 * @stable ICU 4.0 | |
312 */ | |
313 void applyPattern(const UnicodeString& pattern, UErrorCode& status); | |
314 | |
315 | |
316 using Format::format; | |
317 | |
318 /** | |
319 * Formats a plural message for a given number. | |
320 * | |
321 * @param number a number for which the plural message should be formatted | |
322 * for. If no pattern has been applied to this | |
323 * <code>PluralFormat</code> object yet, the formatted number | |
324 * will be returned. | |
325 * @param status output param set to success/failure code on exit, which | |
326 * must not indicate a failure before the function call. | |
327 * @return the string containing the formatted plural message. | |
328 * @stable ICU 4.0 | |
329 */ | |
330 UnicodeString format(int32_t number, UErrorCode& status) const; | |
331 | |
332 /** | |
333 * Formats a plural message for a given number. | |
334 * | |
335 * @param number a number for which the plural message should be formatted | |
336 * for. If no pattern has been applied to this | |
337 * PluralFormat object yet, the formatted number | |
338 * will be returned. | |
339 * @param status output param set to success or failure code on exit, which | |
340 * must not indicate a failure before the function call. | |
341 * @return the string containing the formatted plural message. | |
342 * @stable ICU 4.0 | |
343 */ | |
344 UnicodeString format(double number, UErrorCode& status) const; | |
345 | |
346 /** | |
347 * Formats a plural message for a given number. | |
348 * | |
349 * @param number a number for which the plural message should be formatted | |
350 * for. If no pattern has been applied to this | |
351 * <code>PluralFormat</code> object yet, the formatted numbe
r | |
352 * will be returned. | |
353 * @param appendTo output parameter to receive result. | |
354 * result is appended to existing contents. | |
355 * @param pos On input: an alignment field, if desired. | |
356 * On output: the offsets of the alignment field. | |
357 * @param status output param set to success/failure code on exit, which | |
358 * must not indicate a failure before the function call. | |
359 * @return the string containing the formatted plural message. | |
360 * @stable ICU 4.0 | |
361 */ | |
362 UnicodeString& format(int32_t number, | |
363 UnicodeString& appendTo, | |
364 FieldPosition& pos, | |
365 UErrorCode& status) const; | |
366 | |
367 /** | |
368 * Formats a plural message for a given number. | |
369 * | |
370 * @param number a number for which the plural message should be formatted | |
371 * for. If no pattern has been applied to this | |
372 * PluralFormat object yet, the formatted number | |
373 * will be returned. | |
374 * @param appendTo output parameter to receive result. | |
375 * result is appended to existing contents. | |
376 * @param pos On input: an alignment field, if desired. | |
377 * On output: the offsets of the alignment field. | |
378 * @param status output param set to success/failure code on exit, which | |
379 * must not indicate a failure before the function call. | |
380 * @return the string containing the formatted plural message. | |
381 * @stable ICU 4.0 | |
382 */ | |
383 UnicodeString& format(double number, | |
384 UnicodeString& appendTo, | |
385 FieldPosition& pos, | |
386 UErrorCode& status) const; | |
387 | |
388 /** | |
389 * Sets the locale used by this <code>PluraFormat</code> object. | |
390 * Note: Calling this method resets this <code>PluraFormat</code> object, | |
391 * i.e., a pattern that was applied previously will be removed, | |
392 * and the NumberFormat is set to the default number format for | |
393 * the locale. The resulting format behaves the same as one | |
394 * constructed from {@link #PluralFormat(const Locale& locale, UErrorCod
e& status)}. | |
395 * @param locale the <code>locale</code> to use to configure the formatter. | |
396 * @param status output param set to success/failure code on exit, which | |
397 * must not indicate a failure before the function call. | |
398 * @stable ICU 4.0 | |
399 */ | |
400 void setLocale(const Locale& locale, UErrorCode& status); | |
401 | |
402 /** | |
403 * Sets the number format used by this formatter. You only need to | |
404 * call this if you want a different number format than the default | |
405 * formatter for the locale. | |
406 * @param format the number format to use. | |
407 * @param status output param set to success/failure code on exit, which | |
408 * must not indicate a failure before the function call. | |
409 * @stable ICU 4.0 | |
410 */ | |
411 void setNumberFormat(const NumberFormat* format, UErrorCode& status); | |
412 | |
413 /** | |
414 * Assignment operator | |
415 * | |
416 * @param other the PluralFormat object to copy from. | |
417 * @stable ICU 4.0 | |
418 */ | |
419 PluralFormat& operator=(const PluralFormat& other); | |
420 | |
421 /** | |
422 * Return true if another object is semantically equal to this one. | |
423 * | |
424 * @param other the PluralFormat object to be compared with. | |
425 * @return true if other is semantically equal to this. | |
426 * @stable ICU 4.0 | |
427 */ | |
428 virtual UBool operator==(const Format& other) const; | |
429 | |
430 /** | |
431 * Return true if another object is semantically unequal to this one. | |
432 * | |
433 * @param other the PluralFormat object to be compared with. | |
434 * @return true if other is semantically unequal to this. | |
435 * @stable ICU 4.0 | |
436 */ | |
437 virtual UBool operator!=(const Format& other) const; | |
438 | |
439 /** | |
440 * Clones this Format object polymorphically. The caller owns the | |
441 * result and should delete it when done. | |
442 * @stable ICU 4.0 | |
443 */ | |
444 virtual Format* clone(void) const; | |
445 | |
446 /** | |
447 * Redeclared Format method. | |
448 * | |
449 * @param obj The object to be formatted into a string. | |
450 * @param appendTo output parameter to receive result. | |
451 * Result is appended to existing contents. | |
452 * @param pos On input: an alignment field, if desired. | |
453 * On output: the offsets of the alignment field. | |
454 * @param status output param filled with success/failure status. | |
455 * @return Reference to 'appendTo' parameter. | |
456 * @stable ICU 4.0 | |
457 */ | |
458 UnicodeString& format(const Formattable& obj, | |
459 UnicodeString& appendTo, | |
460 FieldPosition& pos, | |
461 UErrorCode& status) const; | |
462 | |
463 /** | |
464 * Returns the pattern from applyPattern() or constructor(). | |
465 * | |
466 * @param appendTo output parameter to receive result. | |
467 * Result is appended to existing contents. | |
468 * @return the UnicodeString with inserted pattern. | |
469 * @stable ICU 4.0 | |
470 */ | |
471 UnicodeString& toPattern(UnicodeString& appendTo); | |
472 | |
473 /** | |
474 * This method is not yet supported by <code>PluralFormat</code>. | |
475 * <P> | |
476 * Before calling, set parse_pos.index to the offset you want to start | |
477 * parsing at in the source. After calling, parse_pos.index is the end of | |
478 * the text you parsed. If error occurs, index is unchanged. | |
479 * <P> | |
480 * When parsing, leading whitespace is discarded (with a successful parse), | |
481 * while trailing whitespace is left as is. | |
482 * <P> | |
483 * See Format::parseObject() for more. | |
484 * | |
485 * @param source The string to be parsed into an object. | |
486 * @param result Formattable to be set to the parse result. | |
487 * If parse fails, return contents are undefined. | |
488 * @param parse_pos The position to start parsing at. Upon return | |
489 * this param is set to the position after the | |
490 * last character successfully parsed. If the | |
491 * source is not parsed successfully, this param | |
492 * will remain unchanged. | |
493 * @stable ICU 4.0 | |
494 */ | |
495 virtual void parseObject(const UnicodeString& source, | |
496 Formattable& result, | |
497 ParsePosition& parse_pos) const; | |
498 | |
499 /** | |
500 * ICU "poor man's RTTI", returns a UClassID for this class. | |
501 * | |
502 * @stable ICU 4.0 | |
503 * | |
504 */ | |
505 static UClassID U_EXPORT2 getStaticClassID(void); | |
506 | |
507 /** | |
508 * ICU "poor man's RTTI", returns a UClassID for the actual class. | |
509 * | |
510 * @stable ICU 4.0 | |
511 */ | |
512 virtual UClassID getDynamicClassID() const; | |
513 | |
514 private: | |
515 typedef enum fmtToken { | |
516 none, | |
517 tLetter, | |
518 tNumber, | |
519 tSpace, | |
520 tNumberSign, | |
521 tLeftBrace, | |
522 tRightBrace | |
523 }fmtToken; | |
524 | |
525 Locale locale; | |
526 PluralRules* pluralRules; | |
527 UnicodeString pattern; | |
528 Hashtable *fParsedValuesHash; | |
529 NumberFormat* numberFormat; | |
530 NumberFormat* replacedNumberFormat; | |
531 | |
532 PluralFormat(); // default constructor not implemented | |
533 void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& sta
tus); | |
534 UBool inRange(UChar ch, fmtToken& type); | |
535 UBool checkSufficientDefinition(); | |
536 void parsingFailure(); | |
537 UnicodeString insertFormattedNumber(double number, | |
538 UnicodeString& message, | |
539 UnicodeString& appendTo, | |
540 FieldPosition& pos) const; | |
541 void copyHashtable(Hashtable *other, UErrorCode& status); | |
542 }; | |
543 | |
544 U_NAMESPACE_END | |
545 | |
546 #endif /* #if !UCONFIG_NO_FORMATTING */ | |
547 | |
548 #endif // _PLURFMT | |
549 //eof | |
OLD | NEW |