Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1159)

Side by Side Diff: pkg/intl/lib/bidi_utils.dart

Issue 11312203: "Reverting 14829-14832" (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « pkg/http/lib/src/utils.dart ('k') | pkg/intl/lib/date_format.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of intl; 5 part of intl;
6 6
7 /** 7 /**
8 * Bidi stands for Bi-directional text. 8 * Bidi stands for Bi-directional text.
9 * According to http://en.wikipedia.org/wiki/Bi-directional_text: 9 * According to http://en.wikipedia.org/wiki/Bi-directional_text:
10 * Bi-directional text is text containing text in both text directionalities, 10 * Bi-directional text is text containing text in both text directionalities,
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 * elements, style elements, dir attribute,`>` in quoted attribute values, 93 * elements, style elements, dir attribute,`>` in quoted attribute values,
94 * etc. But it does handle well enough the most common use cases. 94 * etc. But it does handle well enough the most common use cases.
95 * Since the worst that can happen as a result of these shortcomings is that 95 * Since the worst that can happen as a result of these shortcomings is that
96 * the wrong directionality will be estimated, we have not invested in 96 * the wrong directionality will be estimated, we have not invested in
97 * improving this. 97 * improving this.
98 */ 98 */
99 static String stripHtmlIfNeeded(String text) { 99 static String stripHtmlIfNeeded(String text) {
100 // The regular expression is simplified for an HTML tag (opening or 100 // The regular expression is simplified for an HTML tag (opening or
101 // closing) or an HTML escape. We might want to skip over such expressions 101 // closing) or an HTML escape. We might want to skip over such expressions
102 // when estimating the text directionality. 102 // when estimating the text directionality.
103 return text.replaceAll(new RegExp(r'<[^>]*>|&[^;]+;'), ' '); 103 return text.replaceAll(const RegExp(r'<[^>]*>|&[^;]+;'), ' ');
104 } 104 }
105 105
106 /** 106 /**
107 * Determines if the first character in [text] with strong directionality is 107 * Determines if the first character in [text] with strong directionality is
108 * LTR. If [isHtml] is true, the text is HTML or HTML-escaped. 108 * LTR. If [isHtml] is true, the text is HTML or HTML-escaped.
109 */ 109 */
110 static bool startsWithLtr(String text, [isHtml=false]) { 110 static bool startsWithLtr(String text, [isHtml=false]) {
111 return new RegExp('^[^$_RTL_CHARS]*[$_LTR_CHARS]').hasMatch( 111 return const RegExp('^[^$_RTL_CHARS]*[$_LTR_CHARS]').hasMatch(
112 isHtml? stripHtmlIfNeeded(text) : text); 112 isHtml? stripHtmlIfNeeded(text) : text);
113 } 113 }
114 114
115 /** 115 /**
116 * Determines if the first character in [text] with strong directionality is 116 * Determines if the first character in [text] with strong directionality is
117 * RTL. If [isHtml] is true, the text is HTML or HTML-escaped. 117 * RTL. If [isHtml] is true, the text is HTML or HTML-escaped.
118 */ 118 */
119 static bool startsWithRtl(String text, [isHtml=false]) { 119 static bool startsWithRtl(String text, [isHtml=false]) {
120 return new RegExp('^[^$_LTR_CHARS]*[$_RTL_CHARS]').hasMatch( 120 return const RegExp('^[^$_LTR_CHARS]*[$_RTL_CHARS]').hasMatch(
121 isHtml? stripHtmlIfNeeded(text) : text); 121 isHtml? stripHtmlIfNeeded(text) : text);
122 } 122 }
123 123
124 /** 124 /**
125 * Determines if the exit directionality (ie, the last strongly-directional 125 * Determines if the exit directionality (ie, the last strongly-directional
126 * character in [text] is LTR. If [isHtml] is true, the text is HTML or 126 * character in [text] is LTR. If [isHtml] is true, the text is HTML or
127 * HTML-escaped. 127 * HTML-escaped.
128 */ 128 */
129 static bool endsWithLtr(String text, [isHtml=false]) { 129 static bool endsWithLtr(String text, [isHtml=false]) {
130 return new RegExp('[$_LTR_CHARS][^$_RTL_CHARS]*\$').hasMatch( 130 return const RegExp('[$_LTR_CHARS][^$_RTL_CHARS]*\$').hasMatch(
131 isHtml? stripHtmlIfNeeded(text) : text); 131 isHtml? stripHtmlIfNeeded(text) : text);
132 } 132 }
133 133
134 /** 134 /**
135 * Determines if the exit directionality (ie, the last strongly-directional 135 * Determines if the exit directionality (ie, the last strongly-directional
136 * character in [text] is RTL. If [isHtml] is true, the text is HTML or 136 * character in [text] is RTL. If [isHtml] is true, the text is HTML or
137 * HTML-escaped. 137 * HTML-escaped.
138 */ 138 */
139 static bool endsWithRtl(String text, [isHtml=false]) { 139 static bool endsWithRtl(String text, [isHtml=false]) {
140 return new RegExp('[$_RTL_CHARS][^$_LTR_CHARS]*\$').hasMatch( 140 return const RegExp('[$_RTL_CHARS][^$_LTR_CHARS]*\$').hasMatch(
141 isHtml? stripHtmlIfNeeded(text) : text); 141 isHtml? stripHtmlIfNeeded(text) : text);
142 } 142 }
143 143
144 /** 144 /**
145 * Determines if the given [text] has any LTR characters in it. 145 * Determines if the given [text] has any LTR characters in it.
146 * If [isHtml] is true, the text is HTML or HTML-escaped. 146 * If [isHtml] is true, the text is HTML or HTML-escaped.
147 */ 147 */
148 static bool hasAnyLtr(String text, [isHtml=false]) { 148 static bool hasAnyLtr(String text, [isHtml=false]) {
149 return new RegExp(r'[' '$_LTR_CHARS' r']').hasMatch( 149 return const RegExp(r'[' '$_LTR_CHARS' r']').hasMatch(
150 isHtml? stripHtmlIfNeeded(text) : text); 150 isHtml? stripHtmlIfNeeded(text) : text);
151 } 151 }
152 152
153 /** 153 /**
154 * Determines if the given [text] has any RTL characters in it. 154 * Determines if the given [text] has any RTL characters in it.
155 * If [isHtml] is true, the text is HTML or HTML-escaped. 155 * If [isHtml] is true, the text is HTML or HTML-escaped.
156 */ 156 */
157 static bool hasAnyRtl(String text, [isHtml=false]) { 157 static bool hasAnyRtl(String text, [isHtml=false]) {
158 return new RegExp(r'[' '$_RTL_CHARS' r']').hasMatch( 158 return const RegExp(r'[' '$_RTL_CHARS' r']').hasMatch(
159 isHtml? stripHtmlIfNeeded(text) : text); 159 isHtml? stripHtmlIfNeeded(text) : text);
160 } 160 }
161 161
162 /** 162 /**
163 * Check if a BCP 47 / III [languageString] indicates an RTL language. 163 * Check if a BCP 47 / III [languageString] indicates an RTL language.
164 * 164 *
165 * i.e. either: 165 * i.e. either:
166 * - a language code explicitly specifying one of the right-to-left scripts, 166 * - a language code explicitly specifying one of the right-to-left scripts,
167 * e.g. "az-Arab", or 167 * e.g. "az-Arab", or
168 * - a language code specifying one of the languages normally written in a 168 * - a language code specifying one of the languages normally written in a
169 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly 169 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly
170 * specifying Latin or Cyrillic script (which are the usual LTR 170 * specifying Latin or Cyrillic script (which are the usual LTR
171 * alternatives). 171 * alternatives).
172 * 172 *
173 * The list of right-to-left scripts appears in the 100-199 range in 173 * The list of right-to-left scripts appears in the 100-199 range in
174 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and 174 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and
175 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and 175 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and
176 * Tifinagh, which also have significant modern usage. The rest (Syriac, 176 * Tifinagh, which also have significant modern usage. The rest (Syriac,
177 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage 177 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage
178 * and are not recognized. 178 * and are not recognized.
179 * The languages usually written in a right-to-left script are taken as those 179 * The languages usually written in a right-to-left script are taken as those
180 * with Suppress-Script: Hebr|Arab|Thaa|Nkoo|Tfng in 180 * with Suppress-Script: Hebr|Arab|Thaa|Nkoo|Tfng in
181 * http://www.iana.org/assignments/language-subtag-registry, 181 * http://www.iana.org/assignments/language-subtag-registry,
182 * as well as Sindhi (sd) and Uyghur (ug). 182 * as well as Sindhi (sd) and Uyghur (ug).
183 * The presence of other subtags of the language code, e.g. regions like EG 183 * The presence of other subtags of the language code, e.g. regions like EG
184 * (Egypt), is ignored. 184 * (Egypt), is ignored.
185 */ 185 */
186 static bool isRtlLanguage(String languageString) { 186 static bool isRtlLanguage(String languageString) {
187 return new RegExp(r'^(ar|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|.*[-_]' 187 return const RegExp(r'^(ar|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|.*[-_]'
188 r'(Arab|Hebr|Thaa|Nkoo|Tfng))(?!.*[-_](Latn|Cyrl)($|-|_))' 188 r'(Arab|Hebr|Thaa|Nkoo|Tfng))(?!.*[-_](Latn|Cyrl)($|-|_))'
189 r'($|-|_)', ignoreCase : true).hasMatch(languageString); 189 r'($|-|_)', ignoreCase : true).hasMatch(languageString);
190 } 190 }
191 191
192 /** 192 /**
193 * Enforce the [html] snippet in RTL directionality regardless of overall 193 * Enforce the [html] snippet in RTL directionality regardless of overall
194 * context. If the html piece was enclosed by a tag, the direction will be 194 * context. If the html piece was enclosed by a tag, the direction will be
195 * applied to existing tag, otherwise a span tag will be added as wrapper. 195 * applied to existing tag, otherwise a span tag will be added as wrapper.
196 * For this reason, if html snippet start with with tag, this tag must enclose 196 * For this reason, if html snippet start with with tag, this tag must enclose
197 * the whole piece. If the tag already has a direction specified, this new one 197 * the whole piece. If the tag already has a direction specified, this new one
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
235 * context. If the html piece was enclosed by a tag, the direction will be 235 * context. If the html piece was enclosed by a tag, the direction will be
236 * applied to existing tag, otherwise a span tag will be added as wrapper. 236 * applied to existing tag, otherwise a span tag will be added as wrapper.
237 * For this reason, if html snippet start with with tag, this tag must enclose 237 * For this reason, if html snippet start with with tag, this tag must enclose
238 * the whole piece. If the tag already has a direction specified, this new one 238 * the whole piece. If the tag already has a direction specified, this new one
239 * will override existing one in behavior (tested on FF and IE). 239 * will override existing one in behavior (tested on FF and IE).
240 */ 240 */
241 static String _enforceInHtmlHelper(String html, String direction) { 241 static String _enforceInHtmlHelper(String html, String direction) {
242 if (html.startsWith('<')) { 242 if (html.startsWith('<')) {
243 StringBuffer buffer = new StringBuffer(); 243 StringBuffer buffer = new StringBuffer();
244 var startIndex = 0; 244 var startIndex = 0;
245 Match match = new RegExp('<\\w+').firstMatch(html); 245 Match match = const RegExp('<\\w+').firstMatch(html);
246 if (match != null) { 246 if (match != null) {
247 buffer.add(html.substring( 247 buffer.add(html.substring(
248 startIndex, match.end)).add(' dir=$direction'); 248 startIndex, match.end)).add(' dir=$direction');
249 startIndex = match.end; 249 startIndex = match.end;
250 } 250 }
251 return buffer.add(html.substring(startIndex)).toString(); 251 return buffer.add(html.substring(startIndex)).toString();
252 } 252 }
253 // '\n' is important for FF so that it won't incorrectly merge span groups. 253 // '\n' is important for FF so that it won't incorrectly merge span groups.
254 return '\n<span dir=$direction>$html</span>'; 254 return '\n<span dir=$direction>$html</span>';
255 } 255 }
256 256
257 /** 257 /**
258 * Apply bracket guard to [str] using html span tag. This is to address the 258 * Apply bracket guard to [str] using html span tag. This is to address the
259 * problem of messy bracket display that frequently happens in RTL layout. 259 * problem of messy bracket display that frequently happens in RTL layout.
260 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL 260 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL
261 * directionality, regardless of the estimated directionality. 261 * directionality, regardless of the estimated directionality.
262 */ 262 */
263 static String guardBracketInHtml(String str, [bool isRtlContext]) { 263 static String guardBracketInHtml(String str, [bool isRtlContext]) {
264 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext; 264 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext;
265 RegExp matchingBrackets = 265 RegExp matchingBrackets =
266 new RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(&lt;.*?(&gt;)+)'); 266 const RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(&lt;.*?(&gt;)+)');
267 return _guardBracketHelper(str, matchingBrackets, 267 return _guardBracketHelper(str, matchingBrackets,
268 '<span dir=${useRtl? "rtl" : "ltr"}>', '</span>'); 268 '<span dir=${useRtl? "rtl" : "ltr"}>', '</span>');
269 } 269 }
270 270
271 /** 271 /**
272 * Apply bracket guard to [str] using LRM and RLM. This is to address the 272 * Apply bracket guard to [str] using LRM and RLM. This is to address the
273 * problem of messy bracket display that frequently happens in RTL layout. 273 * problem of messy bracket display that frequently happens in RTL layout.
274 * This version works for both plain text and html, but in some cases is not 274 * This version works for both plain text and html, but in some cases is not
275 * as good as guardBracketInHtml. 275 * as good as guardBracketInHtml.
276 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL 276 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL
277 * directionality, regardless of the estimated directionality. 277 * directionality, regardless of the estimated directionality.
278 */ 278 */
279 static String guardBracketInText(String str, [bool isRtlContext]) { 279 static String guardBracketInText(String str, [bool isRtlContext]) {
280 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext; 280 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext;
281 var mark = useRtl ? RLM : LRM; 281 var mark = useRtl ? RLM : LRM;
282 return _guardBracketHelper(str, 282 return _guardBracketHelper(str,
283 new RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)'), mark, mark); 283 const RegExp(r'(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)'), mark, mark);
284 } 284 }
285 285
286 /** 286 /**
287 * (Mostly) reimplements the $& functionality of "replace" in JavaScript. 287 * (Mostly) reimplements the $& functionality of "replace" in JavaScript.
288 * Given a [str] and the [regexp] to match with, optionally supply a string to 288 * Given a [str] and the [regexp] to match with, optionally supply a string to
289 * be inserted [before] the match and/or [after]. For example, 289 * be inserted [before] the match and/or [after]. For example,
290 * `_guardBracketHelper('firetruck', new RegExp('truck'), 'hydrant', '!')` 290 * `_guardBracketHelper('firetruck', const RegExp('truck'), 'hydrant', '!')`
291 * would return 'firehydrant!'. 291 * would return 'firehydrant!'.
292 */ 292 */
293 // TODO(efortuna): Get rid of this once this is implemented in Dart. 293 // TODO(efortuna): Get rid of this once this is implemented in Dart.
294 // See Issue 2979. 294 // See Issue 2979.
295 static String _guardBracketHelper(String str, RegExp regexp, [String before, 295 static String _guardBracketHelper(String str, RegExp regexp, [String before,
296 String after]) { 296 String after]) {
297 StringBuffer buffer = new StringBuffer(); 297 StringBuffer buffer = new StringBuffer();
298 var startIndex = 0; 298 var startIndex = 0;
299 Iterable matches = regexp.allMatches(str); 299 Iterable matches = regexp.allMatches(str);
300 for (Match match in matches) { 300 for (Match match in matches) {
(...skipping 17 matching lines...) Expand all
318 * Numbers and URLs are counted as weakly LTR. 318 * Numbers and URLs are counted as weakly LTR.
319 */ 319 */
320 static TextDirection estimateDirectionOfText(String text, 320 static TextDirection estimateDirectionOfText(String text,
321 {bool isHtml: false}) { 321 {bool isHtml: false}) {
322 text = isHtml? stripHtmlIfNeeded(text) : text; 322 text = isHtml? stripHtmlIfNeeded(text) : text;
323 var rtlCount = 0; 323 var rtlCount = 0;
324 var total = 0; 324 var total = 0;
325 var hasWeaklyLtr = false; 325 var hasWeaklyLtr = false;
326 // Split a string into 'words' for directionality estimation based on 326 // Split a string into 'words' for directionality estimation based on
327 // relative word counts. 327 // relative word counts.
328 for (String token in text.split(new RegExp(r'\s+'))) { 328 for (String token in text.split(const RegExp(r'\s+'))) {
329 if (startsWithRtl(token)) { 329 if (startsWithRtl(token)) {
330 rtlCount++; 330 rtlCount++;
331 total++; 331 total++;
332 } else if (new RegExp(r'^http://').hasMatch(token)) { 332 } else if (const RegExp(r'^http://').hasMatch(token)) {
333 // Checked if token looks like something that must always be LTR even in 333 // Checked if token looks like something that must always be LTR even in
334 // RTL text, such as a URL. 334 // RTL text, such as a URL.
335 hasWeaklyLtr = true; 335 hasWeaklyLtr = true;
336 } else if (hasAnyLtr(token)) { 336 } else if (hasAnyLtr(token)) {
337 total++; 337 total++;
338 } else if (new RegExp(r'\d').hasMatch(token)) { 338 } else if (const RegExp(r'\d').hasMatch(token)) {
339 // Checked if token contains any numerals. 339 // Checked if token contains any numerals.
340 hasWeaklyLtr = true; 340 hasWeaklyLtr = true;
341 } 341 }
342 } 342 }
343 343
344 if (total == 0) { 344 if (total == 0) {
345 return hasWeaklyLtr ? TextDirection.LTR : TextDirection.UNKNOWN; 345 return hasWeaklyLtr ? TextDirection.LTR : TextDirection.UNKNOWN;
346 } else if (rtlCount > _RTL_DETECTION_THRESHOLD * total) { 346 } else if (rtlCount > _RTL_DETECTION_THRESHOLD * total) {
347 return TextDirection.RTL; 347 return TextDirection.RTL;
348 } else { 348 } else {
(...skipping 23 matching lines...) Expand all
372 */ 372 */
373 static String normalizeHebrewQuote(String str) { 373 static String normalizeHebrewQuote(String str) {
374 StringBuffer buf = new StringBuffer(); 374 StringBuffer buf = new StringBuffer();
375 if (str.length > 0) { 375 if (str.length > 0) {
376 buf.add(str.substring(0, 1)); 376 buf.add(str.substring(0, 1));
377 } 377 }
378 // Start at 1 because we're looking for the patterns [\u0591-\u05f2])" or 378 // Start at 1 because we're looking for the patterns [\u0591-\u05f2])" or
379 // [\u0591-\u05f2]'. 379 // [\u0591-\u05f2]'.
380 for (int i = 1; i < str.length; i++) { 380 for (int i = 1; i < str.length; i++) {
381 if (str.substring(i, i+1) == '"' 381 if (str.substring(i, i+1) == '"'
382 && new RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) { 382 && const RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) {
383 buf.add('\u05f4'); 383 buf.add('\u05f4');
384 } else if (str.substring(i, i+1) == "'" 384 } else if (str.substring(i, i+1) == "'"
385 && new RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) { 385 && const RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) {
386 buf.add('\u05f3'); 386 buf.add('\u05f3');
387 } else { 387 } else {
388 buf.add(str.substring(i, i+1)); 388 buf.add(str.substring(i, i+1));
389 } 389 }
390 } 390 }
391 return buf.toString(); 391 return buf.toString();
392 } 392 }
393 393
394 /** 394 /**
395 * Check the estimated directionality of [str], return true if the piece of 395 * Check the estimated directionality of [str], return true if the piece of
396 * text should be laid out in RTL direction. If [isHtml] is true, the string 396 * text should be laid out in RTL direction. If [isHtml] is true, the string
397 * is HTML or HTML-escaped. 397 * is HTML or HTML-escaped.
398 */ 398 */
399 static bool detectRtlDirectionality(String str, {bool isHtml: false}) { 399 static bool detectRtlDirectionality(String str, {bool isHtml: false}) {
400 return estimateDirectionOfText(str, isHtml: isHtml) == TextDirection.RTL; 400 return estimateDirectionOfText(str, isHtml: isHtml) == TextDirection.RTL;
401 } 401 }
402 } 402 }
OLDNEW
« no previous file with comments | « pkg/http/lib/src/utils.dart ('k') | pkg/intl/lib/date_format.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698