OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of intl; | 5 part of intl; |
6 | 6 |
7 /** | 7 /** |
8 * Bidi stands for Bi-directional text. | 8 * Bidi stands for Bi-directional text. |
9 * According to http://en.wikipedia.org/wiki/Bi-directional_text: | 9 * According to http://en.wikipedia.org/wiki/Bi-directional_text: |
10 * Bi-directional text is text containing text in both text directionalities, | 10 * Bi-directional text is text containing text in both text directionalities, |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
93 * elements, style elements, dir attribute,`>` in quoted attribute values, | 93 * elements, style elements, dir attribute,`>` in quoted attribute values, |
94 * etc. But it does handle well enough the most common use cases. | 94 * etc. But it does handle well enough the most common use cases. |
95 * Since the worst that can happen as a result of these shortcomings is that | 95 * Since the worst that can happen as a result of these shortcomings is that |
96 * the wrong directionality will be estimated, we have not invested in | 96 * the wrong directionality will be estimated, we have not invested in |
97 * improving this. | 97 * improving this. |
98 */ | 98 */ |
99 static String stripHtmlIfNeeded(String text) { | 99 static String stripHtmlIfNeeded(String text) { |
100 // The regular expression is simplified for an HTML tag (opening or | 100 // The regular expression is simplified for an HTML tag (opening or |
101 // closing) or an HTML escape. We might want to skip over such expressions | 101 // closing) or an HTML escape. We might want to skip over such expressions |
102 // when estimating the text directionality. | 102 // when estimating the text directionality. |
103 return text.replaceAll(new RegExp(r'<[^>]*>|&[^;]+;'), ' '); | 103 return text.replaceAll(const RegExp(r'<[^>]*>|&[^;]+;'), ' '); |
104 } | 104 } |
105 | 105 |
106 /** | 106 /** |
107 * Determines if the first character in [text] with strong directionality is | 107 * Determines if the first character in [text] with strong directionality is |
108 * LTR. If [isHtml] is true, the text is HTML or HTML-escaped. | 108 * LTR. If [isHtml] is true, the text is HTML or HTML-escaped. |
109 */ | 109 */ |
110 static bool startsWithLtr(String text, [isHtml=false]) { | 110 static bool startsWithLtr(String text, [isHtml=false]) { |
111 return new RegExp('^[^$_RTL_CHARS]*[$_LTR_CHARS]').hasMatch( | 111 return const RegExp('^[^$_RTL_CHARS]*[$_LTR_CHARS]').hasMatch( |
112 isHtml? stripHtmlIfNeeded(text) : text); | 112 isHtml? stripHtmlIfNeeded(text) : text); |
113 } | 113 } |
114 | 114 |
115 /** | 115 /** |
116 * Determines if the first character in [text] with strong directionality is | 116 * Determines if the first character in [text] with strong directionality is |
117 * RTL. If [isHtml] is true, the text is HTML or HTML-escaped. | 117 * RTL. If [isHtml] is true, the text is HTML or HTML-escaped. |
118 */ | 118 */ |
119 static bool startsWithRtl(String text, [isHtml=false]) { | 119 static bool startsWithRtl(String text, [isHtml=false]) { |
120 return new RegExp('^[^$_LTR_CHARS]*[$_RTL_CHARS]').hasMatch( | 120 return const RegExp('^[^$_LTR_CHARS]*[$_RTL_CHARS]').hasMatch( |
121 isHtml? stripHtmlIfNeeded(text) : text); | 121 isHtml? stripHtmlIfNeeded(text) : text); |
122 } | 122 } |
123 | 123 |
124 /** | 124 /** |
125 * Determines if the exit directionality (ie, the last strongly-directional | 125 * Determines if the exit directionality (ie, the last strongly-directional |
126 * character in [text] is LTR. If [isHtml] is true, the text is HTML or | 126 * character in [text] is LTR. If [isHtml] is true, the text is HTML or |
127 * HTML-escaped. | 127 * HTML-escaped. |
128 */ | 128 */ |
129 static bool endsWithLtr(String text, [isHtml=false]) { | 129 static bool endsWithLtr(String text, [isHtml=false]) { |
130 return new RegExp('[$_LTR_CHARS][^$_RTL_CHARS]*\$').hasMatch( | 130 return const RegExp('[$_LTR_CHARS][^$_RTL_CHARS]*\$').hasMatch( |
131 isHtml? stripHtmlIfNeeded(text) : text); | 131 isHtml? stripHtmlIfNeeded(text) : text); |
132 } | 132 } |
133 | 133 |
134 /** | 134 /** |
135 * Determines if the exit directionality (ie, the last strongly-directional | 135 * Determines if the exit directionality (ie, the last strongly-directional |
136 * character in [text] is RTL. If [isHtml] is true, the text is HTML or | 136 * character in [text] is RTL. If [isHtml] is true, the text is HTML or |
137 * HTML-escaped. | 137 * HTML-escaped. |
138 */ | 138 */ |
139 static bool endsWithRtl(String text, [isHtml=false]) { | 139 static bool endsWithRtl(String text, [isHtml=false]) { |
140 return new RegExp('[$_RTL_CHARS][^$_LTR_CHARS]*\$').hasMatch( | 140 return const RegExp('[$_RTL_CHARS][^$_LTR_CHARS]*\$').hasMatch( |
141 isHtml? stripHtmlIfNeeded(text) : text); | 141 isHtml? stripHtmlIfNeeded(text) : text); |
142 } | 142 } |
143 | 143 |
144 /** | 144 /** |
145 * Determines if the given [text] has any LTR characters in it. | 145 * Determines if the given [text] has any LTR characters in it. |
146 * If [isHtml] is true, the text is HTML or HTML-escaped. | 146 * If [isHtml] is true, the text is HTML or HTML-escaped. |
147 */ | 147 */ |
148 static bool hasAnyLtr(String text, [isHtml=false]) { | 148 static bool hasAnyLtr(String text, [isHtml=false]) { |
149 return new RegExp(r'[' '$_LTR_CHARS' r']').hasMatch( | 149 return const RegExp(r'[' '$_LTR_CHARS' r']').hasMatch( |
150 isHtml? stripHtmlIfNeeded(text) : text); | 150 isHtml? stripHtmlIfNeeded(text) : text); |
151 } | 151 } |
152 | 152 |
153 /** | 153 /** |
154 * Determines if the given [text] has any RTL characters in it. | 154 * Determines if the given [text] has any RTL characters in it. |
155 * If [isHtml] is true, the text is HTML or HTML-escaped. | 155 * If [isHtml] is true, the text is HTML or HTML-escaped. |
156 */ | 156 */ |
157 static bool hasAnyRtl(String text, [isHtml=false]) { | 157 static bool hasAnyRtl(String text, [isHtml=false]) { |
158 return new RegExp(r'[' '$_RTL_CHARS' r']').hasMatch( | 158 return const RegExp(r'[' '$_RTL_CHARS' r']').hasMatch( |
159 isHtml? stripHtmlIfNeeded(text) : text); | 159 isHtml? stripHtmlIfNeeded(text) : text); |
160 } | 160 } |
161 | 161 |
162 /** | 162 /** |
163 * Check if a BCP 47 / III [languageString] indicates an RTL language. | 163 * Check if a BCP 47 / III [languageString] indicates an RTL language. |
164 * | 164 * |
165 * i.e. either: | 165 * i.e. either: |
166 * - a language code explicitly specifying one of the right-to-left scripts, | 166 * - a language code explicitly specifying one of the right-to-left scripts, |
167 * e.g. "az-Arab", or | 167 * e.g. "az-Arab", or |
168 * - a language code specifying one of the languages normally written in a | 168 * - a language code specifying one of the languages normally written in a |
169 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly | 169 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly |
170 * specifying Latin or Cyrillic script (which are the usual LTR | 170 * specifying Latin or Cyrillic script (which are the usual LTR |
171 * alternatives). | 171 * alternatives). |
172 * | 172 * |
173 * The list of right-to-left scripts appears in the 100-199 range in | 173 * The list of right-to-left scripts appears in the 100-199 range in |
174 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and | 174 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and |
175 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and | 175 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and |
176 * Tifinagh, which also have significant modern usage. The rest (Syriac, | 176 * Tifinagh, which also have significant modern usage. The rest (Syriac, |
177 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage | 177 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage |
178 * and are not recognized. | 178 * and are not recognized. |
179 * The languages usually written in a right-to-left script are taken as those | 179 * The languages usually written in a right-to-left script are taken as those |
180 * with Suppress-Script: Hebr|Arab|Thaa|Nkoo|Tfng in | 180 * with Suppress-Script: Hebr|Arab|Thaa|Nkoo|Tfng in |
181 * http://www.iana.org/assignments/language-subtag-registry, | 181 * http://www.iana.org/assignments/language-subtag-registry, |
182 * as well as Sindhi (sd) and Uyghur (ug). | 182 * as well as Sindhi (sd) and Uyghur (ug). |
183 * The presence of other subtags of the language code, e.g. regions like EG | 183 * The presence of other subtags of the language code, e.g. regions like EG |
184 * (Egypt), is ignored. | 184 * (Egypt), is ignored. |
185 */ | 185 */ |
186 static bool isRtlLanguage(String languageString) { | 186 static bool isRtlLanguage(String languageString) { |
187 return new RegExp(r'^(ar|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|.*[-_]' | 187 return const RegExp(r'^(ar|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|.*[-_]' |
188 r'(Arab|Hebr|Thaa|Nkoo|Tfng))(?!.*[-_](Latn|Cyrl)($|-|_))' | 188 r'(Arab|Hebr|Thaa|Nkoo|Tfng))(?!.*[-_](Latn|Cyrl)($|-|_))' |
189 r'($|-|_)', ignoreCase : true).hasMatch(languageString); | 189 r'($|-|_)', ignoreCase : true).hasMatch(languageString); |
190 } | 190 } |
191 | 191 |
192 /** | 192 /** |
193 * Enforce the [html] snippet in RTL directionality regardless of overall | 193 * Enforce the [html] snippet in RTL directionality regardless of overall |
194 * context. If the html piece was enclosed by a tag, the direction will be | 194 * context. If the html piece was enclosed by a tag, the direction will be |
195 * applied to existing tag, otherwise a span tag will be added as wrapper. | 195 * applied to existing tag, otherwise a span tag will be added as wrapper. |
196 * For this reason, if html snippet start with with tag, this tag must enclose | 196 * For this reason, if html snippet start with with tag, this tag must enclose |
197 * the whole piece. If the tag already has a direction specified, this new one | 197 * the whole piece. If the tag already has a direction specified, this new one |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
235 * context. If the html piece was enclosed by a tag, the direction will be | 235 * context. If the html piece was enclosed by a tag, the direction will be |
236 * applied to existing tag, otherwise a span tag will be added as wrapper. | 236 * applied to existing tag, otherwise a span tag will be added as wrapper. |
237 * For this reason, if html snippet start with with tag, this tag must enclose | 237 * For this reason, if html snippet start with with tag, this tag must enclose |
238 * the whole piece. If the tag already has a direction specified, this new one | 238 * the whole piece. If the tag already has a direction specified, this new one |
239 * will override existing one in behavior (tested on FF and IE). | 239 * will override existing one in behavior (tested on FF and IE). |
240 */ | 240 */ |
241 static String _enforceInHtmlHelper(String html, String direction) { | 241 static String _enforceInHtmlHelper(String html, String direction) { |
242 if (html.startsWith('<')) { | 242 if (html.startsWith('<')) { |
243 StringBuffer buffer = new StringBuffer(); | 243 StringBuffer buffer = new StringBuffer(); |
244 var startIndex = 0; | 244 var startIndex = 0; |
245 Match match = new RegExp('<\\w+').firstMatch(html); | 245 Match match = const RegExp('<\\w+').firstMatch(html); |
246 if (match != null) { | 246 if (match != null) { |
247 buffer.add(html.substring( | 247 buffer.add(html.substring( |
248 startIndex, match.end)).add(' dir=$direction'); | 248 startIndex, match.end)).add(' dir=$direction'); |
249 startIndex = match.end; | 249 startIndex = match.end; |
250 } | 250 } |
251 return buffer.add(html.substring(startIndex)).toString(); | 251 return buffer.add(html.substring(startIndex)).toString(); |
252 } | 252 } |
253 // '\n' is important for FF so that it won't incorrectly merge span groups. | 253 // '\n' is important for FF so that it won't incorrectly merge span groups. |
254 return '\n<span dir=$direction>$html</span>'; | 254 return '\n<span dir=$direction>$html</span>'; |
255 } | 255 } |
256 | 256 |
257 /** | 257 /** |
258 * Apply bracket guard to [str] using html span tag. This is to address the | 258 * Apply bracket guard to [str] using html span tag. This is to address the |
259 * problem of messy bracket display that frequently happens in RTL layout. | 259 * problem of messy bracket display that frequently happens in RTL layout. |
260 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL | 260 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL |
261 * directionality, regardless of the estimated directionality. | 261 * directionality, regardless of the estimated directionality. |
262 */ | 262 */ |
263 static String guardBracketInHtml(String str, [bool isRtlContext]) { | 263 static String guardBracketInHtml(String str, [bool isRtlContext]) { |
264 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext; | 264 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext; |
265 RegExp matchingBrackets = | 265 RegExp matchingBrackets = |
266 new RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?(>)+)'); | 266 const RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?(>)+)'); |
267 return _guardBracketHelper(str, matchingBrackets, | 267 return _guardBracketHelper(str, matchingBrackets, |
268 '<span dir=${useRtl? "rtl" : "ltr"}>', '</span>'); | 268 '<span dir=${useRtl? "rtl" : "ltr"}>', '</span>'); |
269 } | 269 } |
270 | 270 |
271 /** | 271 /** |
272 * Apply bracket guard to [str] using LRM and RLM. This is to address the | 272 * Apply bracket guard to [str] using LRM and RLM. This is to address the |
273 * problem of messy bracket display that frequently happens in RTL layout. | 273 * problem of messy bracket display that frequently happens in RTL layout. |
274 * This version works for both plain text and html, but in some cases is not | 274 * This version works for both plain text and html, but in some cases is not |
275 * as good as guardBracketInHtml. | 275 * as good as guardBracketInHtml. |
276 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL | 276 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL |
277 * directionality, regardless of the estimated directionality. | 277 * directionality, regardless of the estimated directionality. |
278 */ | 278 */ |
279 static String guardBracketInText(String str, [bool isRtlContext]) { | 279 static String guardBracketInText(String str, [bool isRtlContext]) { |
280 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext; | 280 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext; |
281 var mark = useRtl ? RLM : LRM; | 281 var mark = useRtl ? RLM : LRM; |
282 return _guardBracketHelper(str, | 282 return _guardBracketHelper(str, |
283 new RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)'), mark, mark); | 283 const RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)'), mark, mark); |
284 } | 284 } |
285 | 285 |
286 /** | 286 /** |
287 * (Mostly) reimplements the $& functionality of "replace" in JavaScript. | 287 * (Mostly) reimplements the $& functionality of "replace" in JavaScript. |
288 * Given a [str] and the [regexp] to match with, optionally supply a string to | 288 * Given a [str] and the [regexp] to match with, optionally supply a string to |
289 * be inserted [before] the match and/or [after]. For example, | 289 * be inserted [before] the match and/or [after]. For example, |
290 * `_guardBracketHelper('firetruck', new RegExp('truck'), 'hydrant', '!')` | 290 * `_guardBracketHelper('firetruck', const RegExp('truck'), 'hydrant', '!')` |
291 * would return 'firehydrant!'. | 291 * would return 'firehydrant!'. |
292 */ | 292 */ |
293 // TODO(efortuna): Get rid of this once this is implemented in Dart. | 293 // TODO(efortuna): Get rid of this once this is implemented in Dart. |
294 // See Issue 2979. | 294 // See Issue 2979. |
295 static String _guardBracketHelper(String str, RegExp regexp, [String before, | 295 static String _guardBracketHelper(String str, RegExp regexp, [String before, |
296 String after]) { | 296 String after]) { |
297 StringBuffer buffer = new StringBuffer(); | 297 StringBuffer buffer = new StringBuffer(); |
298 var startIndex = 0; | 298 var startIndex = 0; |
299 Iterable matches = regexp.allMatches(str); | 299 Iterable matches = regexp.allMatches(str); |
300 for (Match match in matches) { | 300 for (Match match in matches) { |
(...skipping 17 matching lines...) Expand all Loading... |
318 * Numbers and URLs are counted as weakly LTR. | 318 * Numbers and URLs are counted as weakly LTR. |
319 */ | 319 */ |
320 static TextDirection estimateDirectionOfText(String text, | 320 static TextDirection estimateDirectionOfText(String text, |
321 {bool isHtml: false}) { | 321 {bool isHtml: false}) { |
322 text = isHtml? stripHtmlIfNeeded(text) : text; | 322 text = isHtml? stripHtmlIfNeeded(text) : text; |
323 var rtlCount = 0; | 323 var rtlCount = 0; |
324 var total = 0; | 324 var total = 0; |
325 var hasWeaklyLtr = false; | 325 var hasWeaklyLtr = false; |
326 // Split a string into 'words' for directionality estimation based on | 326 // Split a string into 'words' for directionality estimation based on |
327 // relative word counts. | 327 // relative word counts. |
328 for (String token in text.split(new RegExp(r'\s+'))) { | 328 for (String token in text.split(const RegExp(r'\s+'))) { |
329 if (startsWithRtl(token)) { | 329 if (startsWithRtl(token)) { |
330 rtlCount++; | 330 rtlCount++; |
331 total++; | 331 total++; |
332 } else if (new RegExp(r'^http://').hasMatch(token)) { | 332 } else if (const RegExp(r'^http://').hasMatch(token)) { |
333 // Checked if token looks like something that must always be LTR even in | 333 // Checked if token looks like something that must always be LTR even in |
334 // RTL text, such as a URL. | 334 // RTL text, such as a URL. |
335 hasWeaklyLtr = true; | 335 hasWeaklyLtr = true; |
336 } else if (hasAnyLtr(token)) { | 336 } else if (hasAnyLtr(token)) { |
337 total++; | 337 total++; |
338 } else if (new RegExp(r'\d').hasMatch(token)) { | 338 } else if (const RegExp(r'\d').hasMatch(token)) { |
339 // Checked if token contains any numerals. | 339 // Checked if token contains any numerals. |
340 hasWeaklyLtr = true; | 340 hasWeaklyLtr = true; |
341 } | 341 } |
342 } | 342 } |
343 | 343 |
344 if (total == 0) { | 344 if (total == 0) { |
345 return hasWeaklyLtr ? TextDirection.LTR : TextDirection.UNKNOWN; | 345 return hasWeaklyLtr ? TextDirection.LTR : TextDirection.UNKNOWN; |
346 } else if (rtlCount > _RTL_DETECTION_THRESHOLD * total) { | 346 } else if (rtlCount > _RTL_DETECTION_THRESHOLD * total) { |
347 return TextDirection.RTL; | 347 return TextDirection.RTL; |
348 } else { | 348 } else { |
(...skipping 23 matching lines...) Expand all Loading... |
372 */ | 372 */ |
373 static String normalizeHebrewQuote(String str) { | 373 static String normalizeHebrewQuote(String str) { |
374 StringBuffer buf = new StringBuffer(); | 374 StringBuffer buf = new StringBuffer(); |
375 if (str.length > 0) { | 375 if (str.length > 0) { |
376 buf.add(str.substring(0, 1)); | 376 buf.add(str.substring(0, 1)); |
377 } | 377 } |
378 // Start at 1 because we're looking for the patterns [\u0591-\u05f2])" or | 378 // Start at 1 because we're looking for the patterns [\u0591-\u05f2])" or |
379 // [\u0591-\u05f2]'. | 379 // [\u0591-\u05f2]'. |
380 for (int i = 1; i < str.length; i++) { | 380 for (int i = 1; i < str.length; i++) { |
381 if (str.substring(i, i+1) == '"' | 381 if (str.substring(i, i+1) == '"' |
382 && new RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) { | 382 && const RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) { |
383 buf.add('\u05f4'); | 383 buf.add('\u05f4'); |
384 } else if (str.substring(i, i+1) == "'" | 384 } else if (str.substring(i, i+1) == "'" |
385 && new RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) { | 385 && const RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) { |
386 buf.add('\u05f3'); | 386 buf.add('\u05f3'); |
387 } else { | 387 } else { |
388 buf.add(str.substring(i, i+1)); | 388 buf.add(str.substring(i, i+1)); |
389 } | 389 } |
390 } | 390 } |
391 return buf.toString(); | 391 return buf.toString(); |
392 } | 392 } |
393 | 393 |
394 /** | 394 /** |
395 * Check the estimated directionality of [str], return true if the piece of | 395 * Check the estimated directionality of [str], return true if the piece of |
396 * text should be laid out in RTL direction. If [isHtml] is true, the string | 396 * text should be laid out in RTL direction. If [isHtml] is true, the string |
397 * is HTML or HTML-escaped. | 397 * is HTML or HTML-escaped. |
398 */ | 398 */ |
399 static bool detectRtlDirectionality(String str, {bool isHtml: false}) { | 399 static bool detectRtlDirectionality(String str, {bool isHtml: false}) { |
400 return estimateDirectionOfText(str, isHtml: isHtml) == TextDirection.RTL; | 400 return estimateDirectionOfText(str, isHtml: isHtml) == TextDirection.RTL; |
401 } | 401 } |
402 } | 402 } |
OLD | NEW |