Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Side by Side Diff: src/string.js

Issue 5959002: Improve regexp split, replace and test. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/regexp.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 if (%_ArgumentsLength() === 0) return 0; 152 if (%_ArgumentsLength() === 0) return 0;
153 return %StringLocaleCompare(TO_STRING_INLINE(this), 153 return %StringLocaleCompare(TO_STRING_INLINE(this),
154 TO_STRING_INLINE(other)); 154 TO_STRING_INLINE(other));
155 } 155 }
156 156
157 157
158 // ECMA-262 section 15.5.4.10 158 // ECMA-262 section 15.5.4.10
159 function StringMatch(regexp) { 159 function StringMatch(regexp) {
160 var subject = TO_STRING_INLINE(this); 160 var subject = TO_STRING_INLINE(this);
161 if (IS_REGEXP(regexp)) { 161 if (IS_REGEXP(regexp)) {
162 if (!regexp.global) return regexp.exec(subject); 162 if (!regexp.global) return RegExpExecNoTests(regexp, subject, 0);
163 %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]); 163 %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
164 // lastMatchInfo is defined in regexp.js. 164 // lastMatchInfo is defined in regexp.js.
165 return %StringMatch(subject, regexp, lastMatchInfo); 165 return %StringMatch(subject, regexp, lastMatchInfo);
166 } 166 }
167 // Non-regexp argument. 167 // Non-regexp argument.
168 regexp = new $RegExp(regexp); 168 regexp = new $RegExp(regexp);
169 return RegExpExecNoTests(regexp, subject, 0); 169 return RegExpExecNoTests(regexp, subject, 0);
170 } 170 }
171 171
172 172
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 // suffix 238 // suffix
239 builder.addSpecialSlice(end, subject.length); 239 builder.addSpecialSlice(end, subject.length);
240 240
241 return builder.generate(); 241 return builder.generate();
242 } 242 }
243 243
244 244
245 // Expand the $-expressions in the string and return a new string with 245 // Expand the $-expressions in the string and return a new string with
246 // the result. 246 // the result.
247 function ExpandReplacement(string, subject, matchInfo, builder) { 247 function ExpandReplacement(string, subject, matchInfo, builder) {
248 var length = string.length;
249 var builder_elements = builder.elements;
248 var next = %StringIndexOf(string, '$', 0); 250 var next = %StringIndexOf(string, '$', 0);
249 if (next < 0) { 251 if (next < 0) {
250 builder.add(string); 252 if (length > 0) builder_elements.push(string);
251 return; 253 return;
252 } 254 }
253 255
254 // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102. 256 // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102.
255 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match. 257 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match.
256 258
257 if (next > 0) builder.add(SubString(string, 0, next)); 259 if (next > 0) builder_elements.push(SubString(string, 0, next));
258 var length = string.length;
259 260
260 while (true) { 261 while (true) {
261 var expansion = '$'; 262 var expansion = '$';
262 var position = next + 1; 263 var position = next + 1;
263 if (position < length) { 264 if (position < length) {
264 var peek = %_StringCharCodeAt(string, position); 265 var peek = %_StringCharCodeAt(string, position);
265 if (peek == 36) { // $$ 266 if (peek == 36) { // $$
266 ++position; 267 ++position;
267 builder.add('$'); 268 builder_elements.push('$');
268 } else if (peek == 38) { // $& - match 269 } else if (peek == 38) { // $& - match
269 ++position; 270 ++position;
270 builder.addSpecialSlice(matchInfo[CAPTURE0], 271 builder.addSpecialSlice(matchInfo[CAPTURE0],
271 matchInfo[CAPTURE1]); 272 matchInfo[CAPTURE1]);
272 } else if (peek == 96) { // $` - prefix 273 } else if (peek == 96) { // $` - prefix
273 ++position; 274 ++position;
274 builder.addSpecialSlice(0, matchInfo[CAPTURE0]); 275 builder.addSpecialSlice(0, matchInfo[CAPTURE0]);
275 } else if (peek == 39) { // $' - suffix 276 } else if (peek == 39) { // $' - suffix
276 ++position; 277 ++position;
277 builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length); 278 builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length);
(...skipping 16 matching lines...) Expand all
294 } 295 }
295 } 296 }
296 } 297 }
297 if (0 < n && n < m) { 298 if (0 < n && n < m) {
298 addCaptureString(builder, matchInfo, n); 299 addCaptureString(builder, matchInfo, n);
299 } else { 300 } else {
300 // Because of the captures range check in the parsing of two 301 // Because of the captures range check in the parsing of two
301 // digit capture references, we can only enter here when a 302 // digit capture references, we can only enter here when a
302 // single digit capture reference is outside the range of 303 // single digit capture reference is outside the range of
303 // captures. 304 // captures.
304 builder.add('$'); 305 builder_elements.push('$');
305 --position; 306 --position;
306 } 307 }
307 } else { 308 } else {
308 builder.add('$'); 309 builder_elements.push('$');
309 } 310 }
310 } else { 311 } else {
311 builder.add('$'); 312 builder_elements.push('$');
312 } 313 }
313 314
314 // Go the the next $ in the string. 315 // Go the the next $ in the string.
315 next = %StringIndexOf(string, '$', position); 316 next = %StringIndexOf(string, '$', position);
316 317
317 // Return if there are no more $ characters in the string. If we 318 // Return if there are no more $ characters in the string. If we
318 // haven't reached the end, we need to append the suffix. 319 // haven't reached the end, we need to append the suffix.
319 if (next < 0) { 320 if (next < 0) {
320 if (position < length) { 321 if (position < length) {
321 builder.add(SubString(string, position, length)); 322 builder_elements.push(SubString(string, position, length));
322 } 323 }
323 return; 324 return;
324 } 325 }
325 326
326 // Append substring between the previous and the next $ character. 327 // Append substring between the previous and the next $ character.
327 builder.add(SubString(string, position, next)); 328 if (next > position) {
329 builder_elements.push(SubString(string, position, next));
330 }
328 } 331 }
329 }; 332 };
330 333
331 334
332 // Compute the string of a given regular expression capture. 335 // Compute the string of a given regular expression capture.
333 function CaptureString(string, lastCaptureInfo, index) { 336 function CaptureString(string, lastCaptureInfo, index) {
334 // Scale the index. 337 // Scale the index.
335 var scaled = index << 1; 338 var scaled = index << 1;
336 // Compute start and end. 339 // Compute start and end.
337 var start = lastCaptureInfo[CAPTURE(scaled)]; 340 var start = lastCaptureInfo[CAPTURE(scaled)];
(...skipping 214 matching lines...) Expand 10 before | Expand all | Expand 10 after
552 555
553 if (length === 0) { 556 if (length === 0) {
554 if (DoRegExpExec(separator, subject, 0, 0) != null) { 557 if (DoRegExpExec(separator, subject, 0, 0) != null) {
555 return []; 558 return [];
556 } 559 }
557 return [subject]; 560 return [subject];
558 } 561 }
559 562
560 var currentIndex = 0; 563 var currentIndex = 0;
561 var startIndex = 0; 564 var startIndex = 0;
565 var startMatch = 0;
562 var result = []; 566 var result = [];
563 567
564 outer_loop: 568 outer_loop:
565 while (true) { 569 while (true) {
566 570
567 if (startIndex === length) { 571 if (startIndex === length) {
568 result.push(subject.slice(currentIndex, length)); 572 result.push(SubString(subject, currentIndex, length));
569 break; 573 break;
570 } 574 }
571 575
572 var matchInfo = splitMatch(separator, subject, currentIndex, startIndex); 576 var matchInfo = DoRegExpExec(separator, subject, startIndex);
573 577 if (matchInfo == null || length === (startMatch = matchInfo[CAPTURE0])) {
574 if (IS_NULL(matchInfo)) { 578 result.push(SubString(subject, currentIndex, length));
575 result.push(subject.slice(currentIndex, length));
576 break; 579 break;
577 } 580 }
578
579 var endIndex = matchInfo[CAPTURE1]; 581 var endIndex = matchInfo[CAPTURE1];
580 582
581 // We ignore a zero-length match at the currentIndex. 583 // We ignore a zero-length match at the currentIndex.
582 if (startIndex === endIndex && endIndex === currentIndex) { 584 if (startIndex === endIndex && endIndex === currentIndex) {
583 startIndex++; 585 startIndex++;
584 continue; 586 continue;
585 } 587 }
586 588
587 result.push(SubString(subject, currentIndex, matchInfo[CAPTURE0])); 589 if (currentIndex + 1 == startMatch) {
590 result.push(%_StringCharAt(subject, currentIndex));
591 } else {
592 result.push(%_SubString(subject, currentIndex, startMatch));
593 }
594
588 if (result.length === limit) break; 595 if (result.length === limit) break;
589 596
590 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE; 597 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
591 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) { 598 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) {
592 var start = matchInfo[i++]; 599 var start = matchInfo[i++];
593 var end = matchInfo[i++]; 600 var end = matchInfo[i++];
594 if (end != -1) { 601 if (end != -1) {
595 if (start + 1 == end) { 602 if (start + 1 == end) {
596 result.push(%_StringCharAt(subject, start)); 603 result.push(%_StringCharAt(subject, start));
597 } else { 604 } else {
598 result.push(%_SubString(subject, start, end)); 605 result.push(%_SubString(subject, start, end));
599 } 606 }
600 } else { 607 } else {
601 result.push(void 0); 608 result.push(void 0);
602 } 609 }
603 if (result.length === limit) break outer_loop; 610 if (result.length === limit) break outer_loop;
604 } 611 }
605 612
606 startIndex = currentIndex = endIndex; 613 startIndex = currentIndex = endIndex;
607 } 614 }
608 return result; 615 return result;
609 } 616 }
610 617
611 618
612 // ECMA-262 section 15.5.4.14
613 // Helper function used by split. This version returns the matchInfo
614 // instead of allocating a new array with basically the same information.
615 function splitMatch(separator, subject, current_index, start_index) {
616 var matchInfo = DoRegExpExec(separator, subject, start_index);
617 if (matchInfo == null) return null;
618 // Section 15.5.4.14 paragraph two says that we do not allow zero length
619 // matches at the end of the string.
620 if (matchInfo[CAPTURE0] === subject.length) return null;
621 return matchInfo;
622 }
623
624
625 // ECMA-262 section 15.5.4.15 619 // ECMA-262 section 15.5.4.15
626 function StringSubstring(start, end) { 620 function StringSubstring(start, end) {
627 var s = TO_STRING_INLINE(this); 621 var s = TO_STRING_INLINE(this);
628 var s_len = s.length; 622 var s_len = s.length;
629 623
630 var start_i = TO_INTEGER(start); 624 var start_i = TO_INTEGER(start);
631 if (start_i < 0) { 625 if (start_i < 0) {
632 start_i = 0; 626 start_i = 0;
633 } else if (start_i > s_len) { 627 } else if (start_i > s_len) {
634 start_i = s_len; 628 start_i = s_len;
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
837 this.elements = %_Arguments(1); 831 this.elements = %_Arguments(1);
838 } else { 832 } else {
839 this.elements = new $Array(); 833 this.elements = new $Array();
840 } 834 }
841 this.special_string = str; 835 this.special_string = str;
842 } 836 }
843 837
844 838
845 ReplaceResultBuilder.prototype.add = function(str) { 839 ReplaceResultBuilder.prototype.add = function(str) {
846 str = TO_STRING_INLINE(str); 840 str = TO_STRING_INLINE(str);
847 if (str.length > 0) { 841 if (str.length > 0) this.elements.push(str);
848 var elements = this.elements;
849 elements[elements.length] = str;
850 }
851 } 842 }
852 843
853 844
854 ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) { 845 ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) {
855 var len = end - start; 846 var len = end - start;
856 if (start < 0 || len <= 0) return; 847 if (start < 0 || len <= 0) return;
857 var elements = this.elements;
858 if (start < 0x80000 && len < 0x800) { 848 if (start < 0x80000 && len < 0x800) {
859 elements[elements.length] = (start << 11) | len; 849 this.elements.push((start << 11) | len);
860 } else { 850 } else {
861 // 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength, 851 // 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength,
862 // so -len is a smi. 852 // so -len is a smi.
863 elements[elements.length] = -len; 853 var elements = this.elements;
864 elements[elements.length] = start; 854 elements.push(-len);
855 elements.push(start);
865 } 856 }
866 } 857 }
867 858
868 859
869 ReplaceResultBuilder.prototype.generate = function() { 860 ReplaceResultBuilder.prototype.generate = function() {
870 var elements = this.elements; 861 var elements = this.elements;
871 return %StringBuilderConcat(elements, elements.length, this.special_string); 862 return %StringBuilderConcat(elements, elements.length, this.special_string);
872 } 863 }
873 864
874 865
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
920 "italics", StringItalics, 911 "italics", StringItalics,
921 "small", StringSmall, 912 "small", StringSmall,
922 "strike", StringStrike, 913 "strike", StringStrike,
923 "sub", StringSub, 914 "sub", StringSub,
924 "sup", StringSup 915 "sup", StringSup
925 )); 916 ));
926 } 917 }
927 918
928 919
929 SetupString(); 920 SetupString();
OLDNEW
« no previous file with comments | « src/regexp.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698