Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Side by Side Diff: src/string.js

Issue 5959002: Improve regexp split, replace and test. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« src/regexp.js ('K') | « src/regexp.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 if (%_ArgumentsLength() === 0) return 0; 152 if (%_ArgumentsLength() === 0) return 0;
153 return %StringLocaleCompare(TO_STRING_INLINE(this), 153 return %StringLocaleCompare(TO_STRING_INLINE(this),
154 TO_STRING_INLINE(other)); 154 TO_STRING_INLINE(other));
155 } 155 }
156 156
157 157
158 // ECMA-262 section 15.5.4.10 158 // ECMA-262 section 15.5.4.10
159 function StringMatch(regexp) { 159 function StringMatch(regexp) {
160 var subject = TO_STRING_INLINE(this); 160 var subject = TO_STRING_INLINE(this);
161 if (IS_REGEXP(regexp)) { 161 if (IS_REGEXP(regexp)) {
162 if (!regexp.global) return regexp.exec(subject); 162 if (!regexp.global) return RegExpExecNoTests(regexp, subject, 0);
163 %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]); 163 %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
164 // lastMatchInfo is defined in regexp.js. 164 // lastMatchInfo is defined in regexp.js.
165 return %StringMatch(subject, regexp, lastMatchInfo); 165 return %StringMatch(subject, regexp, lastMatchInfo);
166 } 166 }
167 // Non-regexp argument. 167 // Non-regexp argument.
168 regexp = new $RegExp(regexp); 168 regexp = new $RegExp(regexp);
169 return RegExpExecNoTests(regexp, subject, 0); 169 return RegExpExecNoTests(regexp, subject, 0);
170 } 170 }
171 171
172 172
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 240
241 return builder.generate(); 241 return builder.generate();
242 } 242 }
243 243
244 244
245 // Expand the $-expressions in the string and return a new string with 245 // Expand the $-expressions in the string and return a new string with
246 // the result. 246 // the result.
247 function ExpandReplacement(string, subject, matchInfo, builder) { 247 function ExpandReplacement(string, subject, matchInfo, builder) {
248 var next = %StringIndexOf(string, '$', 0); 248 var next = %StringIndexOf(string, '$', 0);
249 if (next < 0) { 249 if (next < 0) {
250 builder.add(string); 250 builder.addString(string);
251 return; 251 return;
252 } 252 }
253 253
254 // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102. 254 // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102.
255 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match. 255 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match.
256 256
257 if (next > 0) builder.add(SubString(string, 0, next)); 257 if (next > 0) builder.elements.push(SubString(string, 0, next));
Lasse Reichstein 2010/12/17 10:38:30 Consider extracting elements from builder, so you
sandholm 2010/12/17 11:50:24 Done.
258 var length = string.length; 258 var length = string.length;
259 259
260 while (true) { 260 while (true) {
261 var expansion = '$'; 261 var expansion = '$';
262 var position = next + 1; 262 var position = next + 1;
263 if (position < length) { 263 if (position < length) {
264 var peek = %_StringCharCodeAt(string, position); 264 var peek = %_StringCharCodeAt(string, position);
265 if (peek == 36) { // $$ 265 if (peek == 36) { // $$
266 ++position; 266 ++position;
267 builder.add('$'); 267 builder.elements.push('$');
268 } else if (peek == 38) { // $& - match 268 } else if (peek == 38) { // $& - match
269 ++position; 269 ++position;
270 builder.addSpecialSlice(matchInfo[CAPTURE0], 270 builder.addSpecialSlice(matchInfo[CAPTURE0],
271 matchInfo[CAPTURE1]); 271 matchInfo[CAPTURE1]);
272 } else if (peek == 96) { // $` - prefix 272 } else if (peek == 96) { // $` - prefix
273 ++position; 273 ++position;
274 builder.addSpecialSlice(0, matchInfo[CAPTURE0]); 274 builder.addSpecialSlice(0, matchInfo[CAPTURE0]);
275 } else if (peek == 39) { // $' - suffix 275 } else if (peek == 39) { // $' - suffix
276 ++position; 276 ++position;
277 builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length); 277 builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length);
(...skipping 16 matching lines...) Expand all
294 } 294 }
295 } 295 }
296 } 296 }
297 if (0 < n && n < m) { 297 if (0 < n && n < m) {
298 addCaptureString(builder, matchInfo, n); 298 addCaptureString(builder, matchInfo, n);
299 } else { 299 } else {
300 // Because of the captures range check in the parsing of two 300 // Because of the captures range check in the parsing of two
301 // digit capture references, we can only enter here when a 301 // digit capture references, we can only enter here when a
302 // single digit capture reference is outside the range of 302 // single digit capture reference is outside the range of
303 // captures. 303 // captures.
304 builder.add('$'); 304 builder.elements.push('$');
305 --position; 305 --position;
306 } 306 }
307 } else { 307 } else {
308 builder.add('$'); 308 builder.elements.push('$');
309 } 309 }
310 } else { 310 } else {
311 builder.add('$'); 311 builder.elements.push('$');
312 } 312 }
313 313
314 // Go the the next $ in the string. 314 // Go the the next $ in the string.
315 next = %StringIndexOf(string, '$', position); 315 next = %StringIndexOf(string, '$', position);
316 316
317 // Return if there are no more $ characters in the string. If we 317 // Return if there are no more $ characters in the string. If we
318 // haven't reached the end, we need to append the suffix. 318 // haven't reached the end, we need to append the suffix.
319 if (next < 0) { 319 if (next < 0) {
320 if (position < length) { 320 if (position < length) {
321 builder.add(SubString(string, position, length)); 321 builder.elements.push(SubString(string, position, length));
322 } 322 }
323 return; 323 return;
324 } 324 }
325 325
326 // Append substring between the previous and the next $ character. 326 // Append substring between the previous and the next $ character.
327 builder.add(SubString(string, position, next)); 327 builder.addString(SubString(string, position, next));
328 } 328 }
329 }; 329 };
330 330
331 331
332 // Compute the string of a given regular expression capture. 332 // Compute the string of a given regular expression capture.
333 function CaptureString(string, lastCaptureInfo, index) { 333 function CaptureString(string, lastCaptureInfo, index) {
334 // Scale the index. 334 // Scale the index.
335 var scaled = index << 1; 335 var scaled = index << 1;
336 // Compute start and end. 336 // Compute start and end.
337 var start = lastCaptureInfo[CAPTURE(scaled)]; 337 var start = lastCaptureInfo[CAPTURE(scaled)];
(...skipping 214 matching lines...) Expand 10 before | Expand all | Expand 10 after
552 552
553 if (length === 0) { 553 if (length === 0) {
554 if (DoRegExpExec(separator, subject, 0, 0) != null) { 554 if (DoRegExpExec(separator, subject, 0, 0) != null) {
555 return []; 555 return [];
556 } 556 }
557 return [subject]; 557 return [subject];
558 } 558 }
559 559
560 var currentIndex = 0; 560 var currentIndex = 0;
561 var startIndex = 0; 561 var startIndex = 0;
562 var startMatch = 0;
562 var result = []; 563 var result = [];
563 564
564 outer_loop: 565 outer_loop:
565 while (true) { 566 while (true) {
566 567
567 if (startIndex === length) { 568 if (startIndex === length) {
568 result.push(subject.slice(currentIndex, length)); 569 result.push(SubString(subject, currentIndex, length));
569 break; 570 break;
570 } 571 }
571 572
572 var matchInfo = splitMatch(separator, subject, currentIndex, startIndex); 573 var matchInfo = DoRegExpExec(separator, subject, startIndex);
573 574 if (IS_NULL_OR_UNDEFINED(matchInfo)
Lasse Reichstein 2010/12/17 10:38:30 Why accept undefined?
sandholm 2010/12/17 11:50:24 I am just applying the same semantics as the inlin
574 if (IS_NULL(matchInfo)) { 575 || length === (startMatch = matchInfo[CAPTURE0])) {
575 result.push(subject.slice(currentIndex, length)); 576 result.push(SubString(subject, currentIndex, length));
576 break; 577 break;
577 } 578 }
578
579 var endIndex = matchInfo[CAPTURE1]; 579 var endIndex = matchInfo[CAPTURE1];
580 580
581 // We ignore a zero-length match at the currentIndex. 581 // We ignore a zero-length match at the currentIndex.
582 if (startIndex === endIndex && endIndex === currentIndex) { 582 if (startIndex === endIndex && endIndex === currentIndex) {
583 startIndex++; 583 startIndex++;
584 continue; 584 continue;
585 } 585 }
586 586
587 result.push(SubString(subject, currentIndex, matchInfo[CAPTURE0])); 587 if (currentIndex + 1 == startMatch) {
Lasse Reichstein 2010/12/17 10:38:30 Does this really pay off? It seems to be simply in
sandholm 2010/12/17 11:50:24 It is faster. I generally only inline call sites w
588 result.push(%_StringCharAt(subject, currentIndex));
589 } else {
590 result.push(%_SubString(subject, currentIndex, startMatch));
591 }
592
588 if (result.length === limit) break; 593 if (result.length === limit) break;
589 594
590 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE; 595 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
591 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) { 596 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) {
592 var start = matchInfo[i++]; 597 var start = matchInfo[i++];
593 var end = matchInfo[i++]; 598 var end = matchInfo[i++];
594 if (end != -1) { 599 if (end != -1) {
595 if (start + 1 == end) { 600 if (start + 1 == end) {
596 result.push(%_StringCharAt(subject, start)); 601 result.push(%_StringCharAt(subject, start));
597 } else { 602 } else {
598 result.push(%_SubString(subject, start, end)); 603 result.push(%_SubString(subject, start, end));
599 } 604 }
600 } else { 605 } else {
601 result.push(void 0); 606 result.push(void 0);
602 } 607 }
603 if (result.length === limit) break outer_loop; 608 if (result.length === limit) break outer_loop;
604 } 609 }
605 610
606 startIndex = currentIndex = endIndex; 611 startIndex = currentIndex = endIndex;
607 } 612 }
608 return result; 613 return result;
609 } 614 }
610 615
611 616
612 // ECMA-262 section 15.5.4.14
613 // Helper function used by split. This version returns the matchInfo
614 // instead of allocating a new array with basically the same information.
615 function splitMatch(separator, subject, current_index, start_index) {
616 var matchInfo = DoRegExpExec(separator, subject, start_index);
617 if (matchInfo == null) return null;
618 // Section 15.5.4.14 paragraph two says that we do not allow zero length
619 // matches at the end of the string.
620 if (matchInfo[CAPTURE0] === subject.length) return null;
621 return matchInfo;
622 }
623
624
625 // ECMA-262 section 15.5.4.15 617 // ECMA-262 section 15.5.4.15
626 function StringSubstring(start, end) { 618 function StringSubstring(start, end) {
627 var s = TO_STRING_INLINE(this); 619 var s = TO_STRING_INLINE(this);
628 var s_len = s.length; 620 var s_len = s.length;
629 621
630 var start_i = TO_INTEGER(start); 622 var start_i = TO_INTEGER(start);
631 if (start_i < 0) { 623 if (start_i < 0) {
632 start_i = 0; 624 start_i = 0;
633 } else if (start_i > s_len) { 625 } else if (start_i > s_len) {
634 start_i = s_len; 626 start_i = s_len;
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
837 this.elements = %_Arguments(1); 829 this.elements = %_Arguments(1);
838 } else { 830 } else {
839 this.elements = new $Array(); 831 this.elements = new $Array();
840 } 832 }
841 this.special_string = str; 833 this.special_string = str;
842 } 834 }
843 835
844 836
845 ReplaceResultBuilder.prototype.add = function(str) { 837 ReplaceResultBuilder.prototype.add = function(str) {
846 str = TO_STRING_INLINE(str); 838 str = TO_STRING_INLINE(str);
847 if (str.length > 0) { 839 if (str.length > 0) this.elements.push(str);
848 var elements = this.elements; 840 }
849 elements[elements.length] = str; 841
850 } 842
843 ReplaceResultBuilder.prototype.addString = function(str) {
844 if (str.length > 0) this.elements.push(str);
851 } 845 }
852 846
853 847
854 ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) { 848 ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) {
855 var len = end - start; 849 var len = end - start;
856 if (start < 0 || len <= 0) return; 850 if (start < 0 || len <= 0) return;
857 var elements = this.elements;
858 if (start < 0x80000 && len < 0x800) { 851 if (start < 0x80000 && len < 0x800) {
859 elements[elements.length] = (start << 11) | len; 852 this.elements.push((start << 11) | len);
860 } else { 853 } else {
861 // 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength, 854 // 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength,
862 // so -len is a smi. 855 // so -len is a smi.
863 elements[elements.length] = -len; 856 var elements = this.elements;
864 elements[elements.length] = start; 857 elements.push(-len);
858 elements.push(start);
865 } 859 }
866 } 860 }
867 861
868 862
869 ReplaceResultBuilder.prototype.generate = function() { 863 ReplaceResultBuilder.prototype.generate = function() {
870 var elements = this.elements; 864 var elements = this.elements;
871 return %StringBuilderConcat(elements, elements.length, this.special_string); 865 return %StringBuilderConcat(elements, elements.length, this.special_string);
872 } 866 }
873 867
874 868
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
920 "italics", StringItalics, 914 "italics", StringItalics,
921 "small", StringSmall, 915 "small", StringSmall,
922 "strike", StringStrike, 916 "strike", StringStrike,
923 "sub", StringSub, 917 "sub", StringSub,
924 "sup", StringSup 918 "sup", StringSup
925 )); 919 ));
926 } 920 }
927 921
928 922
929 SetupString(); 923 SetupString();
OLDNEW
« src/regexp.js ('K') | « src/regexp.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698