Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Side by Side Diff: src/js/regexp.js

Issue 2401643002: [regexp] Port split (Closed)
Patch Set: Address comments Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/builtins/builtins-regexp.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 (function(global, utils) { 5 (function(global, utils) {
6 6
7 'use strict'; 7 'use strict';
8 8
9 %CheckIsBootstrapping(); 9 %CheckIsBootstrapping();
10 10
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 if (!IS_RECEIVER(result) && !IS_NULL(result)) { 115 if (!IS_RECEIVER(result) && !IS_NULL(result)) {
116 throw %make_type_error(kInvalidRegExpExecResult); 116 throw %make_type_error(kInvalidRegExpExecResult);
117 } 117 }
118 return result; 118 return result;
119 } 119 }
120 return %_Call(RegExpExecJS, regexp, string); 120 return %_Call(RegExpExecJS, regexp, string);
121 } 121 }
122 %SetForceInlineFlag(RegExpSubclassExec); 122 %SetForceInlineFlag(RegExpSubclassExec);
123 123
124 124
125 function AtSurrogatePair(subject, index) {
126 if (index + 1 >= subject.length) return false;
127 var first = %_StringCharCodeAt(subject, index);
128 if (first < 0xD800 || first > 0xDBFF) return false;
129 var second = %_StringCharCodeAt(subject, index + 1);
130 return second >= 0xDC00 && second <= 0xDFFF;
131 }
132
133
134 // Fast path implementation of RegExp.prototype[Symbol.split] which
135 // doesn't properly call the underlying exec, @@species methods
136 function RegExpSplit(string, limit) {
137 if (!IS_REGEXP(this)) {
138 throw %make_type_error(kIncompatibleMethodReceiver,
139 "RegExp.prototype.@@split", this);
140 }
141 var separator = this;
142 var subject = TO_STRING(string);
143
144 limit = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
145 var length = subject.length;
146
147 if (limit === 0) return [];
148
149 if (length === 0) {
150 if (DoRegExpExec(separator, subject, 0, 0) !== null) return [];
151 return [subject];
152 }
153
154 var currentIndex = 0;
155 var startIndex = 0;
156 var startMatch = 0;
157 var result = new InternalArray();
158
159 outer_loop:
160 while (true) {
161 if (startIndex === length) {
162 result[result.length] = %_SubString(subject, currentIndex, length);
163 break;
164 }
165
166 var matchInfo = DoRegExpExec(separator, subject, startIndex);
167 if (matchInfo === null || length === (startMatch = matchInfo[CAPTURE0])) {
168 result[result.length] = %_SubString(subject, currentIndex, length);
169 break;
170 }
171 var endIndex = matchInfo[CAPTURE1];
172
173 // We ignore a zero-length match at the currentIndex.
174 if (startIndex === endIndex && endIndex === currentIndex) {
175 if (REGEXP_UNICODE(this) && AtSurrogatePair(subject, startIndex)) {
176 startIndex += 2;
177 } else {
178 startIndex++;
179 }
180 continue;
181 }
182
183 result[result.length] = %_SubString(subject, currentIndex, startMatch);
184
185 if (result.length === limit) break;
186
187 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
188 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) {
189 var start = matchInfo[i++];
190 var end = matchInfo[i++];
191 if (end != -1) {
192 result[result.length] = %_SubString(subject, start, end);
193 } else {
194 result[result.length] = UNDEFINED;
195 }
196 if (result.length === limit) break outer_loop;
197 }
198
199 startIndex = currentIndex = endIndex;
200 }
201
202 var array_result = [];
203 %MoveArrayContents(result, array_result);
204 return array_result;
205 }
206
207
208 // ES#sec-regexp.prototype-@@split
209 // RegExp.prototype [ @@split ] ( string, limit )
210 function RegExpSubclassSplit(string, limit) {
211 if (!IS_RECEIVER(this)) {
212 throw %make_type_error(kIncompatibleMethodReceiver,
213 "RegExp.prototype.@@split", this);
214 }
215 string = TO_STRING(string);
216 var constructor = SpeciesConstructor(this, GlobalRegExp);
217 var flags = TO_STRING(this.flags);
218
219 // TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
220 // is actually a data property on RegExp.prototype.
221 if (IS_REGEXP(this) && constructor === GlobalRegExp) {
222 var exec = this.exec;
223 if (exec === RegExpExecJS) {
224 return %_Call(RegExpSplit, this, string, limit);
225 }
226 }
227
228 var unicode = %StringIndexOf(flags, 'u', 0) >= 0;
229 var sticky = %StringIndexOf(flags, 'y', 0) >= 0;
230 var newFlags = sticky ? flags : flags + "y";
231 var splitter = new constructor(this, newFlags);
232 var array = new GlobalArray();
233 var arrayIndex = 0;
234 var lim = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
235 var size = string.length;
236 var prevStringIndex = 0;
237 if (lim === 0) return array;
238 var result;
239 if (size === 0) {
240 result = RegExpSubclassExec(splitter, string);
241 if (IS_NULL(result)) %AddElement(array, 0, string);
242 return array;
243 }
244 var stringIndex = prevStringIndex;
245 while (stringIndex < size) {
246 splitter.lastIndex = stringIndex;
247 result = RegExpSubclassExec(splitter, string);
248 if (IS_NULL(result)) {
249 stringIndex += AdvanceStringIndex(string, stringIndex, unicode);
250 } else {
251 var end = MinSimple(TO_LENGTH(splitter.lastIndex), size);
252 if (end === prevStringIndex) {
253 stringIndex += AdvanceStringIndex(string, stringIndex, unicode);
254 } else {
255 %AddElement(
256 array, arrayIndex,
257 %_SubString(string, prevStringIndex, stringIndex));
258 arrayIndex++;
259 if (arrayIndex === lim) return array;
260 prevStringIndex = end;
261 var numberOfCaptures = MaxSimple(TO_LENGTH(result.length), 0);
262 for (var i = 1; i < numberOfCaptures; i++) {
263 %AddElement(array, arrayIndex, result[i]);
264 arrayIndex++;
265 if (arrayIndex === lim) return array;
266 }
267 stringIndex = prevStringIndex;
268 }
269 }
270 }
271 %AddElement(array, arrayIndex,
272 %_SubString(string, prevStringIndex, size));
273 return array;
274 }
275 %FunctionRemovePrototype(RegExpSubclassSplit);
276
277
278 // Legacy implementation of RegExp.prototype[Symbol.replace] which 125 // Legacy implementation of RegExp.prototype[Symbol.replace] which
279 // doesn't properly call the underlying exec method. 126 // doesn't properly call the underlying exec method.
280 127
281 // TODO(lrn): This array will survive indefinitely if replace is never 128 // TODO(lrn): This array will survive indefinitely if replace is never
282 // called again. However, it will be empty, since the contents are cleared 129 // called again. However, it will be empty, since the contents are cleared
283 // in the finally block. 130 // in the finally block.
284 var reusableReplaceArray = new InternalArray(4); 131 var reusableReplaceArray = new InternalArray(4);
285 132
286 // Helper function for replacing regular expressions with the result of a 133 // Helper function for replacing regular expressions with the result of a
287 // function application in String.prototype.replace. 134 // function application in String.prototype.replace.
(...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after
667 return accumulatedResult + %_SubString(string, nextSourcePosition, length); 514 return accumulatedResult + %_SubString(string, nextSourcePosition, length);
668 } 515 }
669 %FunctionRemovePrototype(RegExpSubclassReplace); 516 %FunctionRemovePrototype(RegExpSubclassReplace);
670 517
671 518
672 519
673 // ------------------------------------------------------------------- 520 // -------------------------------------------------------------------
674 521
675 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ 522 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
676 replaceSymbol, RegExpSubclassReplace, 523 replaceSymbol, RegExpSubclassReplace,
677 splitSymbol, RegExpSubclassSplit,
678 ]); 524 ]);
679 525
680 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); 526 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]);
681 527
682 // ------------------------------------------------------------------- 528 // -------------------------------------------------------------------
683 // Internal 529 // Internal
684 530
685 var InternalRegExpMatchInfo = { 531 var InternalRegExpMatchInfo = {
686 REGEXP_NUMBER_OF_CAPTURES: 2, 532 REGEXP_NUMBER_OF_CAPTURES: 2,
687 REGEXP_LAST_SUBJECT: "", 533 REGEXP_LAST_SUBJECT: "",
(...skipping 22 matching lines...) Expand all
710 to.GetSubstitution = GetSubstitution; 556 to.GetSubstitution = GetSubstitution;
711 to.InternalRegExpMatch = InternalRegExpMatch; 557 to.InternalRegExpMatch = InternalRegExpMatch;
712 to.InternalRegExpReplace = InternalRegExpReplace; 558 to.InternalRegExpReplace = InternalRegExpReplace;
713 to.IsRegExp = IsRegExp; 559 to.IsRegExp = IsRegExp;
714 to.RegExpExec = DoRegExpExec; 560 to.RegExpExec = DoRegExpExec;
715 to.RegExpInitialize = RegExpInitialize; 561 to.RegExpInitialize = RegExpInitialize;
716 to.RegExpLastMatchInfo = RegExpLastMatchInfo; 562 to.RegExpLastMatchInfo = RegExpLastMatchInfo;
717 }); 563 });
718 564
719 }) 565 })
OLDNEW
« no previous file with comments | « src/builtins/builtins-regexp.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698