pkg/intl/lib/bidi_utils.dart - Issue 11312203: "Reverting 14829-14832"

Side by Side Diff: pkg/intl/lib/bidi_utils.dart

Issue 11312203: "Reverting 14829-14832" (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of intl;	5 part of intl;

6	6

7 /**	7 /**

8 * Bidi stands for Bi-directional text.	8 * Bidi stands for Bi-directional text.

9 * According to http://en.wikipedia.org/wiki/Bi-directional_text:	9 * According to http://en.wikipedia.org/wiki/Bi-directional_text:

10 * Bi-directional text is text containing text in both text directionalities,	10 * Bi-directional text is text containing text in both text directionalities,

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
93 * elements, style elements, dir attribute,`>` in quoted attribute values,	93 * elements, style elements, dir attribute,`>` in quoted attribute values,

94 * etc. But it does handle well enough the most common use cases.	94 * etc. But it does handle well enough the most common use cases.

95 * Since the worst that can happen as a result of these shortcomings is that	95 * Since the worst that can happen as a result of these shortcomings is that

96 * the wrong directionality will be estimated, we have not invested in	96 * the wrong directionality will be estimated, we have not invested in

97 * improving this.	97 * improving this.

98 */	98 */

99 static String stripHtmlIfNeeded(String text) {	99 static String stripHtmlIfNeeded(String text) {

100 // The regular expression is simplified for an HTML tag (opening or	100 // The regular expression is simplified for an HTML tag (opening or

101 // closing) or an HTML escape. We might want to skip over such expressions	101 // closing) or an HTML escape. We might want to skip over such expressions

102 // when estimating the text directionality.	102 // when estimating the text directionality.

103 return text.replaceAll(new RegExp(r'<[^>]*>\|&[^;]+;'), ' ');	103 return text.replaceAll(const RegExp(r'<[^>]*>\|&[^;]+;'), ' ');

104 }	104 }

105	105

106 /**	106 /**

107 * Determines if the first character in [text] with strong directionality is	107 * Determines if the first character in [text] with strong directionality is

108 * LTR. If [isHtml] is true, the text is HTML or HTML-escaped.	108 * LTR. If [isHtml] is true, the text is HTML or HTML-escaped.

109 */	109 */

110 static bool startsWithLtr(String text, [isHtml=false]) {	110 static bool startsWithLtr(String text, [isHtml=false]) {

111 return new RegExp('^[^$_RTL_CHARS]*[$_LTR_CHARS]').hasMatch(	111 return const RegExp('^[^$_RTL_CHARS]*[$_LTR_CHARS]').hasMatch(

112 isHtml? stripHtmlIfNeeded(text) : text);	112 isHtml? stripHtmlIfNeeded(text) : text);

113 }	113 }

114	114

115 /**	115 /**

116 * Determines if the first character in [text] with strong directionality is	116 * Determines if the first character in [text] with strong directionality is

117 * RTL. If [isHtml] is true, the text is HTML or HTML-escaped.	117 * RTL. If [isHtml] is true, the text is HTML or HTML-escaped.

118 */	118 */

119 static bool startsWithRtl(String text, [isHtml=false]) {	119 static bool startsWithRtl(String text, [isHtml=false]) {

120 return new RegExp('^[^$_LTR_CHARS]*[$_RTL_CHARS]').hasMatch(	120 return const RegExp('^[^$_LTR_CHARS]*[$_RTL_CHARS]').hasMatch(

121 isHtml? stripHtmlIfNeeded(text) : text);	121 isHtml? stripHtmlIfNeeded(text) : text);

122 }	122 }

123	123

124 /**	124 /**

125 * Determines if the exit directionality (ie, the last strongly-directional	125 * Determines if the exit directionality (ie, the last strongly-directional

126 * character in [text] is LTR. If [isHtml] is true, the text is HTML or	126 * character in [text] is LTR. If [isHtml] is true, the text is HTML or

127 * HTML-escaped.	127 * HTML-escaped.

128 */	128 */

129 static bool endsWithLtr(String text, [isHtml=false]) {	129 static bool endsWithLtr(String text, [isHtml=false]) {

130 return new RegExp('[$_LTR_CHARS][^$_RTL_CHARS]*\$').hasMatch(	130 return const RegExp('[$_LTR_CHARS][^$_RTL_CHARS]*\$').hasMatch(

131 isHtml? stripHtmlIfNeeded(text) : text);	131 isHtml? stripHtmlIfNeeded(text) : text);

132 }	132 }

133	133

134 /**	134 /**

135 * Determines if the exit directionality (ie, the last strongly-directional	135 * Determines if the exit directionality (ie, the last strongly-directional

136 * character in [text] is RTL. If [isHtml] is true, the text is HTML or	136 * character in [text] is RTL. If [isHtml] is true, the text is HTML or

137 * HTML-escaped.	137 * HTML-escaped.

138 */	138 */

139 static bool endsWithRtl(String text, [isHtml=false]) {	139 static bool endsWithRtl(String text, [isHtml=false]) {

140 return new RegExp('[$_RTL_CHARS][^$_LTR_CHARS]*\$').hasMatch(	140 return const RegExp('[$_RTL_CHARS][^$_LTR_CHARS]*\$').hasMatch(

141 isHtml? stripHtmlIfNeeded(text) : text);	141 isHtml? stripHtmlIfNeeded(text) : text);

142 }	142 }

143	143

144 /**	144 /**

145 * Determines if the given [text] has any LTR characters in it.	145 * Determines if the given [text] has any LTR characters in it.

146 * If [isHtml] is true, the text is HTML or HTML-escaped.	146 * If [isHtml] is true, the text is HTML or HTML-escaped.

147 */	147 */

148 static bool hasAnyLtr(String text, [isHtml=false]) {	148 static bool hasAnyLtr(String text, [isHtml=false]) {

149 return new RegExp(r'[' '$_LTR_CHARS' r']').hasMatch(	149 return const RegExp(r'[' '$_LTR_CHARS' r']').hasMatch(

150 isHtml? stripHtmlIfNeeded(text) : text);	150 isHtml? stripHtmlIfNeeded(text) : text);

151 }	151 }

152	152

153 /**	153 /**

154 * Determines if the given [text] has any RTL characters in it.	154 * Determines if the given [text] has any RTL characters in it.

155 * If [isHtml] is true, the text is HTML or HTML-escaped.	155 * If [isHtml] is true, the text is HTML or HTML-escaped.

156 */	156 */

157 static bool hasAnyRtl(String text, [isHtml=false]) {	157 static bool hasAnyRtl(String text, [isHtml=false]) {

158 return new RegExp(r'[' '$_RTL_CHARS' r']').hasMatch(	158 return const RegExp(r'[' '$_RTL_CHARS' r']').hasMatch(

159 isHtml? stripHtmlIfNeeded(text) : text);	159 isHtml? stripHtmlIfNeeded(text) : text);

160 }	160 }

161	161

162 /**	162 /**

163 * Check if a BCP 47 / III [languageString] indicates an RTL language.	163 * Check if a BCP 47 / III [languageString] indicates an RTL language.

164 *	164 *

165 * i.e. either:	165 * i.e. either:

166 * - a language code explicitly specifying one of the right-to-left scripts,	166 * - a language code explicitly specifying one of the right-to-left scripts,

167 * e.g. "az-Arab", or	167 * e.g. "az-Arab", or

168 * - a language code specifying one of the languages normally written in a	168 * - a language code specifying one of the languages normally written in a

169 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly	169 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly

170 * specifying Latin or Cyrillic script (which are the usual LTR	170 * specifying Latin or Cyrillic script (which are the usual LTR

171 * alternatives).	171 * alternatives).

172 *	172 *

173 * The list of right-to-left scripts appears in the 100-199 range in	173 * The list of right-to-left scripts appears in the 100-199 range in

174 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and	174 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and

175 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and	175 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and

176 * Tifinagh, which also have significant modern usage. The rest (Syriac,	176 * Tifinagh, which also have significant modern usage. The rest (Syriac,

177 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage	177 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage

178 * and are not recognized.	178 * and are not recognized.

179 * The languages usually written in a right-to-left script are taken as those	179 * The languages usually written in a right-to-left script are taken as those

180 * with Suppress-Script: Hebr\|Arab\|Thaa\|Nkoo\|Tfng in	180 * with Suppress-Script: Hebr\|Arab\|Thaa\|Nkoo\|Tfng in

181 * http://www.iana.org/assignments/language-subtag-registry,	181 * http://www.iana.org/assignments/language-subtag-registry,

182 * as well as Sindhi (sd) and Uyghur (ug).	182 * as well as Sindhi (sd) and Uyghur (ug).

183 * The presence of other subtags of the language code, e.g. regions like EG	183 * The presence of other subtags of the language code, e.g. regions like EG

184 * (Egypt), is ignored.	184 * (Egypt), is ignored.

185 */	185 */

186 static bool isRtlLanguage(String languageString) {	186 static bool isRtlLanguage(String languageString) {

187 return new RegExp(r'^(ar\|dv\|he\|iw\|fa\|nqo\|ps\|sd\|ug\|ur\|yi\|.*[-_]'	187 return const RegExp(r'^(ar\|dv\|he\|iw\|fa\|nqo\|ps\|sd\|ug\|ur\|yi\|.*[-_]'

188 r'(Arab\|Hebr\|Thaa\|Nkoo\|Tfng))(?!.*[-_](Latn\|Cyrl)($\|-\|_))'	188 r'(Arab\|Hebr\|Thaa\|Nkoo\|Tfng))(?!.*[-_](Latn\|Cyrl)($\|-\|_))'

189 r'($\|-\|_)', ignoreCase : true).hasMatch(languageString);	189 r'($\|-\|_)', ignoreCase : true).hasMatch(languageString);

190 }	190 }

191	191

192 /**	192 /**

193 * Enforce the [html] snippet in RTL directionality regardless of overall	193 * Enforce the [html] snippet in RTL directionality regardless of overall

194 * context. If the html piece was enclosed by a tag, the direction will be	194 * context. If the html piece was enclosed by a tag, the direction will be

195 * applied to existing tag, otherwise a span tag will be added as wrapper.	195 * applied to existing tag, otherwise a span tag will be added as wrapper.

196 * For this reason, if html snippet start with with tag, this tag must enclose	196 * For this reason, if html snippet start with with tag, this tag must enclose

197 * the whole piece. If the tag already has a direction specified, this new one	197 * the whole piece. If the tag already has a direction specified, this new one

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
235 * context. If the html piece was enclosed by a tag, the direction will be	235 * context. If the html piece was enclosed by a tag, the direction will be

236 * applied to existing tag, otherwise a span tag will be added as wrapper.	236 * applied to existing tag, otherwise a span tag will be added as wrapper.

237 * For this reason, if html snippet start with with tag, this tag must enclose	237 * For this reason, if html snippet start with with tag, this tag must enclose

238 * the whole piece. If the tag already has a direction specified, this new one	238 * the whole piece. If the tag already has a direction specified, this new one

239 * will override existing one in behavior (tested on FF and IE).	239 * will override existing one in behavior (tested on FF and IE).

240 */	240 */

241 static String _enforceInHtmlHelper(String html, String direction) {	241 static String _enforceInHtmlHelper(String html, String direction) {

242 if (html.startsWith('<')) {	242 if (html.startsWith('<')) {

243 StringBuffer buffer = new StringBuffer();	243 StringBuffer buffer = new StringBuffer();

244 var startIndex = 0;	244 var startIndex = 0;

245 Match match = new RegExp('<\\w+').firstMatch(html);	245 Match match = const RegExp('<\\w+').firstMatch(html);

246 if (match != null) {	246 if (match != null) {

247 buffer.add(html.substring(	247 buffer.add(html.substring(

248 startIndex, match.end)).add(' dir=$direction');	248 startIndex, match.end)).add(' dir=$direction');

249 startIndex = match.end;	249 startIndex = match.end;

250 }	250 }

251 return buffer.add(html.substring(startIndex)).toString();	251 return buffer.add(html.substring(startIndex)).toString();

252 }	252 }

253 // '\n' is important for FF so that it won't incorrectly merge span groups.	253 // '\n' is important for FF so that it won't incorrectly merge span groups.

254 return '\n<span dir=$direction>$html</span>';	254 return '\n<span dir=$direction>$html</span>';

255 }	255 }

256	256

257 /**	257 /**

258 * Apply bracket guard to [str] using html span tag. This is to address the	258 * Apply bracket guard to [str] using html span tag. This is to address the

259 * problem of messy bracket display that frequently happens in RTL layout.	259 * problem of messy bracket display that frequently happens in RTL layout.

260 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL	260 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL

261 * directionality, regardless of the estimated directionality.	261 * directionality, regardless of the estimated directionality.

262 */	262 */

263 static String guardBracketInHtml(String str, [bool isRtlContext]) {	263 static String guardBracketInHtml(String str, [bool isRtlContext]) {

264 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext;	264 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext;

265 RegExp matchingBrackets =	265 RegExp matchingBrackets =

266 new RegExp(r'($.?$+)\|(\[.?\]+)\|(\{.?\}+)\|(<.?(>)+)');	266 const RegExp(r'($.?$+)\|(\[.?\]+)\|(\{.?\}+)\|(<.?(>)+)');

267 return _guardBracketHelper(str, matchingBrackets,	267 return _guardBracketHelper(str, matchingBrackets,

268 '<span dir=${useRtl? "rtl" : "ltr"}>', '</span>');	268 '<span dir=${useRtl? "rtl" : "ltr"}>', '</span>');

269 }	269 }

270	270

271 /**	271 /**

272 * Apply bracket guard to [str] using LRM and RLM. This is to address the	272 * Apply bracket guard to [str] using LRM and RLM. This is to address the

273 * problem of messy bracket display that frequently happens in RTL layout.	273 * problem of messy bracket display that frequently happens in RTL layout.

274 * This version works for both plain text and html, but in some cases is not	274 * This version works for both plain text and html, but in some cases is not

275 * as good as guardBracketInHtml.	275 * as good as guardBracketInHtml.

276 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL	276 * If [isRtlContext] is true, then we explicitly want to wrap in a span of RTL

277 * directionality, regardless of the estimated directionality.	277 * directionality, regardless of the estimated directionality.

278 */	278 */

279 static String guardBracketInText(String str, [bool isRtlContext]) {	279 static String guardBracketInText(String str, [bool isRtlContext]) {

280 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext;	280 var useRtl = isRtlContext == null ? hasAnyRtl(str) : isRtlContext;

281 var mark = useRtl ? RLM : LRM;	281 var mark = useRtl ? RLM : LRM;

282 return _guardBracketHelper(str,	282 return _guardBracketHelper(str,

283 new RegExp(r'($.?$+)\|(\[.?\]+)\|(\{.?\}+)\|(<.?>+)'), mark, mark);	283 const RegExp(r'($.?$+)\|(\[.?\]+)\|(\{.?\}+)\|(<.?>+)'), mark, mark);

284 }	284 }

285	285

286 /**	286 /**

287 * (Mostly) reimplements the $& functionality of "replace" in JavaScript.	287 * (Mostly) reimplements the $& functionality of "replace" in JavaScript.

288 * Given a [str] and the [regexp] to match with, optionally supply a string to	288 * Given a [str] and the [regexp] to match with, optionally supply a string to

289 * be inserted [before] the match and/or [after]. For example,	289 * be inserted [before] the match and/or [after]. For example,

290 * `_guardBracketHelper('firetruck', new RegExp('truck'), 'hydrant', '!')`	290 * `_guardBracketHelper('firetruck', const RegExp('truck'), 'hydrant', '!')`

291 * would return 'firehydrant!'.	291 * would return 'firehydrant!'.

292 */	292 */

293 // TODO(efortuna): Get rid of this once this is implemented in Dart.	293 // TODO(efortuna): Get rid of this once this is implemented in Dart.

294 // See Issue 2979.	294 // See Issue 2979.

295 static String _guardBracketHelper(String str, RegExp regexp, [String before,	295 static String _guardBracketHelper(String str, RegExp regexp, [String before,

296 String after]) {	296 String after]) {

297 StringBuffer buffer = new StringBuffer();	297 StringBuffer buffer = new StringBuffer();

298 var startIndex = 0;	298 var startIndex = 0;

299 Iterable matches = regexp.allMatches(str);	299 Iterable matches = regexp.allMatches(str);

300 for (Match match in matches) {	300 for (Match match in matches) {

(...skipping 17 matching lines...) Expand all Loading...
318 * Numbers and URLs are counted as weakly LTR.	318 * Numbers and URLs are counted as weakly LTR.

319 */	319 */

320 static TextDirection estimateDirectionOfText(String text,	320 static TextDirection estimateDirectionOfText(String text,

321 {bool isHtml: false}) {	321 {bool isHtml: false}) {

322 text = isHtml? stripHtmlIfNeeded(text) : text;	322 text = isHtml? stripHtmlIfNeeded(text) : text;

323 var rtlCount = 0;	323 var rtlCount = 0;

324 var total = 0;	324 var total = 0;

325 var hasWeaklyLtr = false;	325 var hasWeaklyLtr = false;

326 // Split a string into 'words' for directionality estimation based on	326 // Split a string into 'words' for directionality estimation based on

327 // relative word counts.	327 // relative word counts.

328 for (String token in text.split(new RegExp(r'\s+'))) {	328 for (String token in text.split(const RegExp(r'\s+'))) {

329 if (startsWithRtl(token)) {	329 if (startsWithRtl(token)) {

330 rtlCount++;	330 rtlCount++;

331 total++;	331 total++;

332 } else if (new RegExp(r'^http://').hasMatch(token)) {	332 } else if (const RegExp(r'^http://').hasMatch(token)) {

333 // Checked if token looks like something that must always be LTR even in	333 // Checked if token looks like something that must always be LTR even in

334 // RTL text, such as a URL.	334 // RTL text, such as a URL.

335 hasWeaklyLtr = true;	335 hasWeaklyLtr = true;

336 } else if (hasAnyLtr(token)) {	336 } else if (hasAnyLtr(token)) {

337 total++;	337 total++;

338 } else if (new RegExp(r'\d').hasMatch(token)) {	338 } else if (const RegExp(r'\d').hasMatch(token)) {

339 // Checked if token contains any numerals.	339 // Checked if token contains any numerals.

340 hasWeaklyLtr = true;	340 hasWeaklyLtr = true;

341 }	341 }

342 }	342 }

343	343

344 if (total == 0) {	344 if (total == 0) {

345 return hasWeaklyLtr ? TextDirection.LTR : TextDirection.UNKNOWN;	345 return hasWeaklyLtr ? TextDirection.LTR : TextDirection.UNKNOWN;

346 } else if (rtlCount > _RTL_DETECTION_THRESHOLD * total) {	346 } else if (rtlCount > _RTL_DETECTION_THRESHOLD * total) {

347 return TextDirection.RTL;	347 return TextDirection.RTL;

348 } else {	348 } else {

(...skipping 23 matching lines...) Expand all Loading...
372 */	372 */

373 static String normalizeHebrewQuote(String str) {	373 static String normalizeHebrewQuote(String str) {

374 StringBuffer buf = new StringBuffer();	374 StringBuffer buf = new StringBuffer();

375 if (str.length > 0) {	375 if (str.length > 0) {

376 buf.add(str.substring(0, 1));	376 buf.add(str.substring(0, 1));

377 }	377 }

378 // Start at 1 because we're looking for the patterns [\u0591-\u05f2])" or	378 // Start at 1 because we're looking for the patterns [\u0591-\u05f2])" or

379 // [\u0591-\u05f2]'.	379 // [\u0591-\u05f2]'.

380 for (int i = 1; i < str.length; i++) {	380 for (int i = 1; i < str.length; i++) {

381 if (str.substring(i, i+1) == '"'	381 if (str.substring(i, i+1) == '"'

382 && new RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) {	382 && const RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) {

383 buf.add('\u05f4');	383 buf.add('\u05f4');

384 } else if (str.substring(i, i+1) == "'"	384 } else if (str.substring(i, i+1) == "'"

385 && new RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) {	385 && const RegExp('[\u0591-\u05f2]').hasMatch(str.substring(i-1, i))) {

386 buf.add('\u05f3');	386 buf.add('\u05f3');

387 } else {	387 } else {

388 buf.add(str.substring(i, i+1));	388 buf.add(str.substring(i, i+1));

389 }	389 }

390 }	390 }

391 return buf.toString();	391 return buf.toString();

392 }	392 }

393	393

394 /**	394 /**

395 * Check the estimated directionality of [str], return true if the piece of	395 * Check the estimated directionality of [str], return true if the piece of

396 * text should be laid out in RTL direction. If [isHtml] is true, the string	396 * text should be laid out in RTL direction. If [isHtml] is true, the string

397 * is HTML or HTML-escaped.	397 * is HTML or HTML-escaped.

398 */	398 */

399 static bool detectRtlDirectionality(String str, {bool isHtml: false}) {	399 static bool detectRtlDirectionality(String str, {bool isHtml: false}) {

400 return estimateDirectionOfText(str, isHtml: isHtml) == TextDirection.RTL;	400 return estimateDirectionOfText(str, isHtml: isHtml) == TextDirection.RTL;

401 }	401 }

402 }	402 }

OLD	NEW

« no previous file with comments | « pkg/http/lib/src/utils.dart ('k') | pkg/intl/lib/date_format.dart » ('j') | no next file with comments »