Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(237)

Side by Side Diff: src/jsregexp.cc

Issue 53047: Implement string.match in C++. (Closed)
Patch Set: Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
198 ASSERT(!result.is_null() || Top::has_pending_exception()); 198 ASSERT(!result.is_null() || Top::has_pending_exception());
199 return result; 199 return result;
200 } 200 }
201 default: 201 default:
202 UNREACHABLE(); 202 UNREACHABLE();
203 return Handle<Object>::null(); 203 return Handle<Object>::null();
204 } 204 }
205 } 205 }
206 206
207 207
208 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
209 Handle<String> subject,
210 Handle<JSArray> last_match_info) {
211 switch (regexp->TypeTag()) {
212 case JSRegExp::ATOM:
213 return AtomExecGlobal(regexp, subject, last_match_info);
214 case JSRegExp::IRREGEXP: {
215 Handle<Object> result =
216 IrregexpExecGlobal(regexp, subject, last_match_info);
217 ASSERT(!result.is_null() || Top::has_pending_exception());
218 return result;
219 }
220 default:
221 UNREACHABLE();
222 return Handle<Object>::null();
223 }
224 }
225
226
227 // RegExp Atom implementation: Simple string search using indexOf. 208 // RegExp Atom implementation: Simple string search using indexOf.
228 209
229 210
230 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, 211 void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
231 Handle<String> pattern, 212 Handle<String> pattern,
232 JSRegExp::Flags flags, 213 JSRegExp::Flags flags,
233 Handle<String> match_pattern) { 214 Handle<String> match_pattern) {
234 Factory::SetRegExpAtomData(re, 215 Factory::SetRegExpAtomData(re,
235 JSRegExp::ATOM, 216 JSRegExp::ATOM,
236 pattern, 217 pattern,
(...skipping 29 matching lines...) Expand all
266 247
267 { 248 {
268 NoHandleAllocation no_handles; 249 NoHandleAllocation no_handles;
269 FixedArray* array = last_match_info->elements(); 250 FixedArray* array = last_match_info->elements();
270 SetAtomLastCapture(array, *subject, value, value + needle->length()); 251 SetAtomLastCapture(array, *subject, value, value + needle->length());
271 } 252 }
272 return last_match_info; 253 return last_match_info;
273 } 254 }
274 255
275 256
276 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
277 Handle<String> subject,
278 Handle<JSArray> last_match_info) {
279 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
280 ASSERT(last_match_info->HasFastElements());
281 Handle<JSArray> result = Factory::NewJSArray(1);
282 int index = 0;
283 int match_count = 0;
284 int subject_length = subject->length();
285 int needle_length = needle->length();
286 int last_value = -1;
287 while (true) {
288 HandleScope scope;
289 int value = -1;
290 if (index + needle_length <= subject_length) {
291 value = Runtime::StringMatch(subject, needle, index);
292 }
293 if (value == -1) {
294 if (last_value != -1) {
295 Handle<FixedArray> array(last_match_info->elements());
296 SetAtomLastCapture(*array,
297 *subject,
298 last_value,
299 last_value + needle->length());
300 }
301 break;
302 }
303
304 int end = value + needle_length;
305
306 // Create an array that looks like the static last_match_info array
307 // that is attached to the global RegExp object. We will be returning
308 // an array of these.
309 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
310 SetLastCaptureCount(*array, 2);
311 // Ignore subject and input fields.
312 SetCapture(*array, 0, value);
313 SetCapture(*array, 1, end);
314 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
315 SetElement(result, match_count, pair);
316 match_count++;
317 index = end;
318 if (needle_length == 0) index++;
319 last_value = value;
320 }
321 return result;
322 }
323
324
325 // Irregexp implementation. 257 // Irregexp implementation.
326 258
327 259
328 // Ensures that the regexp object contains a compiled version of the 260 // Ensures that the regexp object contains a compiled version of the
329 // source for either ASCII or non-ASCII strings. 261 // source for either ASCII or non-ASCII strings.
330 // If the compiled version doesn't already exist, it is compiled 262 // If the compiled version doesn't already exist, it is compiled
331 // from the source pattern. 263 // from the source pattern.
332 // If compilation fails, an exception is thrown and this function 264 // If compilation fails, an exception is thrown and this function
333 // returns false. 265 // returns false.
334 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, 266 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
335 bool is_ascii) {
336 int index; 267 int index;
337 if (is_ascii) { 268 if (is_ascii) {
338 index = JSRegExp::kIrregexpASCIICodeIndex; 269 index = JSRegExp::kIrregexpASCIICodeIndex;
339 } else { 270 } else {
340 index = JSRegExp::kIrregexpUC16CodeIndex; 271 index = JSRegExp::kIrregexpUC16CodeIndex;
341 } 272 }
342 Object* entry = re->DataAt(index); 273 Object* entry = re->DataAt(index);
343 if (!entry->IsTheHole()) { 274 if (!entry->IsTheHole()) {
344 // A value has already been compiled. 275 // A value has already been compiled.
345 if (entry->IsJSObject()) { 276 if (entry->IsJSObject()) {
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 int capture_count) { 384 int capture_count) {
454 // Initialize compiled code entries to null. 385 // Initialize compiled code entries to null.
455 Factory::SetRegExpIrregexpData(re, 386 Factory::SetRegExpIrregexpData(re,
456 JSRegExp::IRREGEXP, 387 JSRegExp::IRREGEXP,
457 pattern, 388 pattern,
458 flags, 389 flags,
459 capture_count); 390 capture_count);
460 } 391 }
461 392
462 393
463 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, 394 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
464 Handle<String> subject, 395 Handle<String> subject,
465 int index, 396 int previous_index,
466 Handle<JSArray> last_match_info) { 397 Handle<JSArray> last_match_info) {
467 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 398 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
468 399
469 // Prepare space for the return values. 400 // Prepare space for the return values.
470 int number_of_capture_registers = 401 int number_of_capture_registers =
471 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 402 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
472 OffsetsVector offsets(number_of_capture_registers); 403 OffsetsVector offsets(number_of_capture_registers);
473 404
474 int previous_index = index;
475
476 #ifdef DEBUG 405 #ifdef DEBUG
477 if (FLAG_trace_regexp_bytecodes) { 406 if (FLAG_trace_regexp_bytecodes) {
478 String* pattern = regexp->Pattern(); 407 String* pattern = jsregexp->Pattern();
479 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 408 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
480 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 409 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
481 } 410 }
482 #endif 411 #endif
483 412
484 if (!subject->IsFlat()) { 413 if (!subject->IsFlat()) {
485 FlattenString(subject); 414 FlattenString(subject);
486 } 415 }
487 416
488 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); 417 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
489 418
490 return IrregexpExecOnce(regexp, 419 int* offsets_vector = offsets.vector();
491 number_of_capture_registers, 420 int offsets_vector_length = offsets.length();
492 last_match_info, 421 bool rc;
493 subject,
494 previous_index,
495 offsets.vector(),
496 offsets.length());
497 }
498 422
499 423 // Dispatch to the correct RegExp implementation.
500 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
501 Handle<String> subject,
502 Handle<JSArray> last_match_info) {
503 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
504
505 // Prepare space for the return values.
506 int number_of_capture_registers =
507 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
508 OffsetsVector offsets(number_of_capture_registers);
509
510 int previous_index = 0;
511
512 Handle<JSArray> result = Factory::NewJSArray(0);
513 int result_length = 0;
514 Handle<Object> matches;
515
516 if (!subject->IsFlat()) {
517 FlattenString(subject);
518 }
519
520 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
521
522 while (true) {
523 if (previous_index > subject->length() || previous_index < 0) {
524 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
525 // string length, there is no match.
526 return result;
527 } else {
528 #ifdef DEBUG
529 if (FLAG_trace_regexp_bytecodes) {
530 String* pattern = regexp->Pattern();
531 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
532 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
533 }
534 #endif
535 HandleScope scope;
536 matches = IrregexpExecOnce(regexp,
537 number_of_capture_registers,
538 last_match_info,
539 subject,
540 previous_index,
541 offsets.vector(),
542 offsets.length());
543
544 if (matches.is_null()) {
545 ASSERT(Top::has_pending_exception());
546 return matches;
547 }
548
549 if (matches->IsJSArray()) {
550 // Create an array that looks like the static last_match_info array
551 // that is attached to the global RegExp object. We will be returning
552 // an array of these.
553 int match_length = kFirstCapture + number_of_capture_registers;
554 Handle<JSArray> latest_match =
555 Factory::NewJSArray(match_length);
556
557 AssertNoAllocation no_allocation;
558 FixedArray* match_array = JSArray::cast(*matches)->elements();
559 match_array->CopyTo(0,
560 latest_match->elements(),
561 0,
562 match_length);
563 SetElement(result, result_length, latest_match);
564 result_length++;
565 previous_index = GetCapture(match_array, 1);
566 if (GetCapture(match_array, 0) == previous_index) {
567 previous_index++;
568 }
569 } else {
570 ASSERT(matches->IsNull());
571 return result;
572 }
573 }
574 }
575 }
576
577
578 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
579 int number_of_capture_registers,
580 Handle<JSArray> last_match_info,
581 Handle<String> subject,
582 int previous_index,
583 int* offsets_vector,
584 int offsets_vector_length) {
585 ASSERT(subject->IsFlat());
586 bool rc;
587 424
588 Handle<String> original_subject = subject; 425 Handle<String> original_subject = subject;
589 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); 426 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
590 if (UseNativeRegexp()) { 427 if (UseNativeRegexp()) {
591 #ifdef ARM 428 #ifdef ARM
592 UNREACHABLE(); 429 UNREACHABLE();
593 #else 430 #else
594 RegExpMacroAssemblerIA32::Result res; 431 RegExpMacroAssemblerIA32::Result res;
595 do { 432 do {
596 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 433 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
(...skipping 27 matching lines...) Expand all
624 offsets_vector[i] = -1; 461 offsets_vector[i] = -1;
625 } 462 }
626 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); 463 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
627 464
628 rc = IrregexpInterpreter::Match(byte_codes, 465 rc = IrregexpInterpreter::Match(byte_codes,
629 subject, 466 subject,
630 offsets_vector, 467 offsets_vector,
631 previous_index); 468 previous_index);
632 } 469 }
633 470
471 // Handle results from RegExp implementation.
472
634 if (!rc) { 473 if (!rc) {
635 return Factory::null_value(); 474 return Factory::null_value();
636 } 475 }
637 476
638 FixedArray* array = last_match_info->elements(); 477 FixedArray* array = last_match_info->elements();
639 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); 478 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
640 // The captures come in (start, end+1) pairs. 479 // The captures come in (start, end+1) pairs.
641 SetLastCaptureCount(array, number_of_capture_registers); 480 SetLastCaptureCount(array, number_of_capture_registers);
642 SetLastSubject(array, *original_subject); 481 SetLastSubject(array, *original_subject);
643 SetLastInput(array, *original_subject); 482 SetLastInput(array, *original_subject);
(...skipping 3972 matching lines...) Expand 10 before | Expand all | Expand 10 after
4616 EmbeddedVector<byte, 1024> codes; 4455 EmbeddedVector<byte, 1024> codes;
4617 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4456 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4618 return compiler.Assemble(&macro_assembler, 4457 return compiler.Assemble(&macro_assembler,
4619 node, 4458 node,
4620 data->capture_count, 4459 data->capture_count,
4621 pattern); 4460 pattern);
4622 } 4461 }
4623 4462
4624 4463
4625 }} // namespace v8::internal 4464 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/platform-win32.cc » ('j') | src/string.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698