Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Side by Side Diff: src/jsregexp.cc

Issue 13618: * Delayed compilation of irregexps until use-time, and specialize on char type. (Closed)
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 Handle<String> error_text, 194 Handle<String> error_text,
195 const char* message) { 195 const char* message) {
196 Handle<JSArray> array = Factory::NewJSArray(2); 196 Handle<JSArray> array = Factory::NewJSArray(2);
197 SetElement(array, 0, pattern); 197 SetElement(array, 0, pattern);
198 SetElement(array, 1, error_text); 198 SetElement(array, 1, error_text);
199 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array); 199 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
200 Top::Throw(*regexp_err); 200 Top::Throw(*regexp_err);
201 } 201 }
202 202
203 203
204 // Generic RegExp methods. Dispatches to implementation specific methods.
205
206
207 class OffsetsVector {
208 public:
209 inline OffsetsVector(int num_registers)
210 : offsets_vector_length_(num_registers) {
211 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
212 vector_ = NewArray<int>(offsets_vector_length_);
213 } else {
214 vector_ = static_offsets_vector_;
215 }
216 }
217
218
219 inline ~OffsetsVector() {
220 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
221 DeleteArray(vector_);
222 vector_ = NULL;
223 }
224 }
225
226
227 inline int* vector() {
228 return vector_;
229 }
230
231
232 inline int length() {
233 return offsets_vector_length_;
234 }
235
236 private:
237 int* vector_;
238 int offsets_vector_length_;
239 static const int kStaticOffsetsVectorSize = 50;
240 static int static_offsets_vector_[kStaticOffsetsVectorSize];
241 };
242
243
244 int OffsetsVector::static_offsets_vector_[
245 OffsetsVector::kStaticOffsetsVectorSize];
246
247
204 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, 248 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
205 Handle<String> pattern, 249 Handle<String> pattern,
206 Handle<String> flag_str) { 250 Handle<String> flag_str) {
207 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 251 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
208 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 252 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
209 bool in_cache = !cached.is_null(); 253 bool in_cache = !cached.is_null();
210 LOG(RegExpCompileEvent(re, in_cache)); 254 LOG(RegExpCompileEvent(re, in_cache));
211 255
212 Handle<Object> result; 256 Handle<Object> result;
213 if (in_cache) { 257 if (in_cache) {
214 re->set_data(*cached); 258 re->set_data(*cached);
215 result = re; 259 result = re;
216 } else { 260 } else {
217 FlattenString(pattern); 261 FlattenString(pattern);
218 ZoneScope zone_scope(DELETE_ON_EXIT); 262 ZoneScope zone_scope(DELETE_ON_EXIT);
219 RegExpParseResult parse_result; 263 RegExpParseResult parse_result;
220 FlatStringReader reader(pattern); 264 FlatStringReader reader(pattern);
221 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
222 // Throw an exception if we fail to parse the pattern. 266 // Throw an exception if we fail to parse the pattern.
223 ThrowRegExpException(re, 267 ThrowRegExpException(re,
224 pattern, 268 pattern,
225 parse_result.error, 269 parse_result.error,
226 "malformed_regexp"); 270 "malformed_regexp");
227 return Handle<Object>(); 271 return Handle<Object>::null();
228 } 272 }
229 RegExpAtom* atom = parse_result.tree->AsAtom(); 273 RegExpAtom* atom = parse_result.tree->AsAtom();
230 if (atom != NULL && !flags.is_ignore_case()) { 274 if (atom != NULL && !flags.is_ignore_case()) {
231 if (parse_result.has_character_escapes) { 275 if (parse_result.has_character_escapes) {
232 Vector<const uc16> atom_pattern = atom->data(); 276 Vector<const uc16> atom_pattern = atom->data();
233 Handle<String> atom_string = 277 Handle<String> atom_string =
234 Factory::NewStringFromTwoByte(atom_pattern); 278 Factory::NewStringFromTwoByte(atom_pattern);
235 result = AtomCompile(re, pattern, flags, atom_string); 279 result = AtomCompile(re, pattern, flags, atom_string);
236 } else { 280 } else {
237 result = AtomCompile(re, pattern, flags, pattern); 281 result = AtomCompile(re, pattern, flags, pattern);
238 } 282 }
239 } else { 283 } else {
240 RegExpNode* node = NULL; 284 if (FLAG_irregexp) {
241 Handle<FixedArray> irregexp_data = 285 result = IrregexpPrepare(re, pattern, flags);
242 RegExpEngine::Compile(&parse_result, 286 } else {
243 &node,
244 flags.is_ignore_case(),
245 flags.is_multiline(),
246 pattern);
247 if (irregexp_data.is_null()) {
248 if (FLAG_disable_jscre) {
249 UNIMPLEMENTED();
250 }
251 result = JscrePrepare(re, pattern, flags); 287 result = JscrePrepare(re, pattern, flags);
252 } else {
253 result = IrregexpPrepare(re, pattern, flags, irregexp_data);
254 } 288 }
255 } 289 }
256 Object* data = re->data(); 290 Object* data = re->data();
257 if (data->IsFixedArray()) { 291 if (data->IsFixedArray()) {
258 // If compilation succeeded then the data is set on the regexp 292 // If compilation succeeded then the data is set on the regexp
259 // and we can store it in the cache. 293 // and we can store it in the cache.
260 Handle<FixedArray> data(FixedArray::cast(re->data())); 294 Handle<FixedArray> data(FixedArray::cast(re->data()));
261 CompilationCache::PutRegExp(pattern, flags, data); 295 CompilationCache::PutRegExp(pattern, flags, data);
262 } 296 }
263 } 297 }
264 298
265 return result; 299 return result;
266 } 300 }
267 301
268 302
269 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 303 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
270 Handle<String> subject, 304 Handle<String> subject,
271 Handle<Object> index) { 305 Handle<Object> index) {
272 switch (regexp->TypeTag()) { 306 switch (regexp->TypeTag()) {
307 case JSRegExp::ATOM:
308 return AtomExec(regexp, subject, index);
309 case JSRegExp::IRREGEXP: {
310 Handle<Object> result = IrregexpExec(regexp, subject, index);
311 if (!result.is_null()) {
312 return result;
313 }
314 // We couldn't handle the regexp using Irregexp, so fall back
315 // on JSCRE. We rejoice at the though of the day when this is
Erik Corry 2008/12/08 12:47:51 spolling.
316 // no longer needed.
317 // Reset the JSRegExp to use JSCRE.
318 JscrePrepare(regexp,
319 Handle<String>(regexp->Pattern()),
320 regexp->GetFlags());
321 // Fall-through to JSCRE.
322 }
273 case JSRegExp::JSCRE: 323 case JSRegExp::JSCRE:
274 if (FLAG_disable_jscre) { 324 if (FLAG_disable_jscre) {
275 UNIMPLEMENTED(); 325 UNIMPLEMENTED();
276 } 326 }
277 return JscreExec(regexp, subject, index); 327 return JscreExec(regexp, subject, index);
278 case JSRegExp::ATOM:
279 return AtomExec(regexp, subject, index);
280 case JSRegExp::IRREGEXP:
281 return IrregexpExec(regexp, subject, index);
282 default: 328 default:
283 UNREACHABLE(); 329 UNREACHABLE();
284 return Handle<Object>(); 330 return Handle<Object>::null();
285 } 331 }
286 } 332 }
287 333
288 334
289 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 335 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
290 Handle<String> subject) { 336 Handle<String> subject) {
291 switch (regexp->TypeTag()) { 337 switch (regexp->TypeTag()) {
338 case JSRegExp::ATOM:
339 return AtomExecGlobal(regexp, subject);
340 case JSRegExp::IRREGEXP: {
341 Handle<Object> result = IrregexpExecGlobal(regexp, subject);
342 if (!result.is_null()) {
343 return result;
344 }
345 // We couldn't handle the regexp using Irregexp, so fall back
346 // on JSCRE. We rejoice at the though of the day when this is
Erik Corry 2008/12/08 12:47:51 Speling
347 // no longer needed.
348 // Reset the JSRegExp to use JSCRE.
349 JscrePrepare(regexp,
350 Handle<String>(regexp->Pattern()),
351 regexp->GetFlags());
352 // Fall-through to JSCRE.
353 }
292 case JSRegExp::JSCRE: 354 case JSRegExp::JSCRE:
293 if (FLAG_disable_jscre) { 355 if (FLAG_disable_jscre) {
294 UNIMPLEMENTED(); 356 UNIMPLEMENTED();
295 } 357 }
296 return JscreExecGlobal(regexp, subject); 358 return JscreExecGlobal(regexp, subject);
297 case JSRegExp::ATOM:
298 return AtomExecGlobal(regexp, subject);
299 case JSRegExp::IRREGEXP:
300 return IrregexpExecGlobal(regexp, subject);
301 default: 359 default:
302 UNREACHABLE(); 360 UNREACHABLE();
303 return Handle<Object>(); 361 return Handle<Object>::null();
304 } 362 }
305 } 363 }
306 364
307 365
366 // RegExp Atom implementation: Simple string search using indexOf.
367
368
308 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, 369 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
309 Handle<String> pattern, 370 Handle<String> pattern,
310 JSRegExp::Flags flags, 371 JSRegExp::Flags flags,
311 Handle<String> match_pattern) { 372 Handle<String> match_pattern) {
312 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); 373 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);
313 return re; 374 return re;
314 } 375 }
315 376
316 377
317 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 378 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
359 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); 420 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
360 SetElement(result, match_count, pair); 421 SetElement(result, match_count, pair);
361 match_count++; 422 match_count++;
362 index = end; 423 index = end;
363 if (needle_length == 0) index++; 424 if (needle_length == 0) index++;
364 } 425 }
365 return result; 426 return result;
366 } 427 }
367 428
368 429
430 // JSCRE implementation.
431
432
433 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
434 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
435 return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
436 }
437
438
439 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
440 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
441 return ByteArray::cast(value->get(kJscreInternalIndex));
442 }
443
444
369 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re, 445 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
370 Handle<String> pattern, 446 Handle<String> pattern,
371 JSRegExp::Flags flags) { 447 JSRegExp::Flags flags) {
372 Handle<Object> value(Heap::undefined_value()); 448 Handle<Object> value(Heap::undefined_value());
373 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); 449 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
374 return re; 450 return re;
375 } 451 }
376 452
377 453
378 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, 454 static inline Object* JscreDoCompile(String* pattern,
379 Handle<String> pattern, 455 JSRegExp::Flags flags,
380 JSRegExp::Flags flags, 456 unsigned* number_of_captures,
381 Handle<FixedArray> irregexp_data) { 457 const char** error_message,
382 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data); 458 v8::jscre::JscreRegExp** code) {
383 return re;
384 }
385
386
387 static inline Object* DoCompile(String* pattern,
388 JSRegExp::Flags flags,
389 unsigned* number_of_captures,
390 const char** error_message,
391 v8::jscre::JscreRegExp** code) {
392 v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() 459 v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
393 ? v8::jscre::JSRegExpIgnoreCase 460 ? v8::jscre::JSRegExpIgnoreCase
394 : v8::jscre::JSRegExpDoNotIgnoreCase; 461 : v8::jscre::JSRegExpDoNotIgnoreCase;
395 v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline() 462 v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline()
396 ? v8::jscre::JSRegExpMultiline 463 ? v8::jscre::JSRegExpMultiline
397 : v8::jscre::JSRegExpSingleLine; 464 : v8::jscre::JSRegExpSingleLine;
398 *error_message = NULL; 465 *error_message = NULL;
399 malloc_failure = Failure::Exception(); 466 malloc_failure = Failure::Exception();
400 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(), 467 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(),
401 pattern->length(), 468 pattern->length(),
402 case_option, 469 case_option,
403 multiline_option, 470 multiline_option,
404 number_of_captures, 471 number_of_captures,
405 error_message, 472 error_message,
406 &JSREMalloc, 473 &JSREMalloc,
407 &JSREFree); 474 &JSREFree);
408 if (*code == NULL && (malloc_failure->IsRetryAfterGC() || 475 if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||
409 malloc_failure->IsOutOfMemoryFailure())) { 476 malloc_failure->IsOutOfMemoryFailure())) {
410 return malloc_failure; 477 return malloc_failure;
411 } else { 478 } else {
412 // It doesn't matter which object we return here, we just need to return 479 // It doesn't matter which object we return here, we just need to return
413 // a non-failure to indicate to the GC-retry code that there was no 480 // a non-failure to indicate to the GC-retry code that there was no
414 // allocation failure. 481 // allocation failure.
415 return pattern; 482 return pattern;
416 } 483 }
417 } 484 }
418 485
419 486
420 void CompileWithRetryAfterGC(Handle<String> pattern, 487 static void JscreCompileWithRetryAfterGC(Handle<String> pattern,
421 JSRegExp::Flags flags, 488 JSRegExp::Flags flags,
422 unsigned* number_of_captures, 489 unsigned* number_of_captures,
423 const char** error_message, 490 const char** error_message,
424 v8::jscre::JscreRegExp** code) { 491 v8::jscre::JscreRegExp** code) {
425 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern, 492 CALL_HEAP_FUNCTION_VOID(JscreDoCompile(*pattern,
426 flags, 493 flags,
427 number_of_captures, 494 number_of_captures,
428 error_message, 495 error_message,
429 code)); 496 code));
430 } 497 }
431 498
432 499
433 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) { 500 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
434 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE); 501 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
435 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()); 502 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
436 503
437 Handle<String> pattern(re->Pattern()); 504 Handle<String> pattern(re->Pattern());
438 JSRegExp::Flags flags = re->GetFlags(); 505 JSRegExp::Flags flags = re->GetFlags();
439 506
440 Handle<String> two_byte_pattern = StringToTwoByte(pattern); 507 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
441 508
442 unsigned number_of_captures; 509 unsigned number_of_captures;
443 const char* error_message = NULL; 510 const char* error_message = NULL;
444 511
445 v8::jscre::JscreRegExp* code = NULL; 512 v8::jscre::JscreRegExp* code = NULL;
446 FlattenString(pattern); 513 FlattenString(pattern);
447 514
448 CompileWithRetryAfterGC(two_byte_pattern, 515 JscreCompileWithRetryAfterGC(two_byte_pattern,
449 flags, 516 flags,
450 &number_of_captures, 517 &number_of_captures,
451 &error_message, 518 &error_message,
452 &code); 519 &code);
453 520
454 if (code == NULL) { 521 if (code == NULL) {
455 // Throw an exception. 522 // Throw an exception.
456 Handle<JSArray> array = Factory::NewJSArray(2); 523 Handle<JSArray> array = Factory::NewJSArray(2);
457 SetElement(array, 0, pattern); 524 SetElement(array, 0, pattern);
458 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( 525 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
459 (error_message == NULL) ? "Unknown regexp error" : error_message))); 526 (error_message == NULL) ? "Unknown regexp error" : error_message)));
460 Handle<Object> regexp_err = 527 Handle<Object> regexp_err =
461 Factory::NewSyntaxError("malformed_regexp", array); 528 Factory::NewSyntaxError("malformed_regexp", array);
462 Top::Throw(*regexp_err); 529 Top::Throw(*regexp_err);
463 return Handle<Object>(); 530 return Handle<Object>();
464 } 531 }
465 532
466 // Convert the return address to a ByteArray pointer. 533 // Convert the return address to a ByteArray pointer.
467 Handle<ByteArray> internal( 534 Handle<ByteArray> internal(
468 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); 535 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
469 536
470 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength); 537 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
471 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures)); 538 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures));
472 value->set(kJscreInternalIndex, *internal); 539 value->set(kJscreInternalIndex, *internal);
473 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); 540 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
474 541
475 return re; 542 return re;
476 } 543 }
477 544
478 545
479 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, 546 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
480 int num_captures, 547 Handle<String> subject,
481 Handle<String> two_byte_subject, 548 Handle<Object> index) {
482 int previous_index, 549 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
483 int* offsets_vector, 550 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
484 int offsets_vector_length) { 551 Handle<Object> compile_result = JscreCompile(regexp);
485 #ifdef DEBUG 552 if (compile_result.is_null()) return compile_result;
486 if (FLAG_trace_regexp_bytecodes) {
487 String* pattern = regexp->Pattern();
488 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
489 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));
490 } 553 }
491 #endif 554 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
492 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
493 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
494 bool rc;
495 555
496 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { 556 int num_captures = JscreNumberOfCaptures(regexp);
497 offsets_vector[i] = -1;
498 }
499 557
500 LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject)); 558 OffsetsVector offsets((num_captures + 1) * 3);
501 559
502 FixedArray* irregexp = 560 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
503 FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
504 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
505 561
506 switch (tag) { 562 Handle<String> subject16 = CachedStringToTwoByte(subject);
507 case RegExpMacroAssembler::kIA32Implementation: {
508 #ifndef ARM
509 Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
510 Address start_addr =
511 Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
512 int string_offset =
513 start_addr - reinterpret_cast<Address>(*two_byte_subject);
514 int start_offset = string_offset + previous_index * sizeof(uc16);
515 int end_offset =
516 string_offset + two_byte_subject->length() * sizeof(uc16);
517 rc = RegExpMacroAssemblerIA32::Execute(code,
518 two_byte_subject.location(),
519 start_offset,
520 end_offset,
521 offsets_vector,
522 previous_index == 0);
523 if (rc) {
524 // Capture values are relative to start_offset only.
525 for (int i = 0; i < offsets_vector_length; i++) {
526 if (offsets_vector[i] >= 0) {
527 offsets_vector[i] += previous_index;
528 }
529 }
530 }
531 break;
532 #else
533 UNIMPLEMENTED();
534 rc = false;
535 break;
536 #endif
537 }
538 case RegExpMacroAssembler::kBytecodeImplementation: {
539 Handle<ByteArray> byte_codes = IrregexpCode(regexp);
540 563
541 rc = IrregexpInterpreter::Match(byte_codes, 564 return JscreExecOnce(regexp,
542 two_byte_subject, 565 num_captures,
543 offsets_vector, 566 subject,
544 previous_index); 567 previous_index,
545 break; 568 subject16->GetTwoByteData(),
546 } 569 offsets.vector(),
547 case RegExpMacroAssembler::kARMImplementation: 570 offsets.length());
548 default:
549 UNREACHABLE();
550 rc = false;
551 break;
552 }
553
554 if (!rc) {
555 return Factory::null_value();
556 }
557
558 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
559 // The captures come in (start, end+1) pairs.
560 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
561 array->set(i, Smi::FromInt(offsets_vector[i]));
562 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
563 }
564 return Factory::NewJSArrayWithElements(array);
565 } 571 }
566 572
567 573
568 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp, 574 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
569 int num_captures, 575 int num_captures,
570 Handle<String> subject, 576 Handle<String> subject,
571 int previous_index, 577 int previous_index,
572 const uc16* two_byte_subject, 578 const uc16* two_byte_subject,
573 int* offsets_vector, 579 int* offsets_vector,
574 int offsets_vector_length) { 580 int offsets_vector_length) {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
610 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); 616 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
611 // The captures come in (start, end+1) pairs. 617 // The captures come in (start, end+1) pairs.
612 for (int i = 0; i < 2 * (num_captures+1); i += 2) { 618 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
613 array->set(i, Smi::FromInt(offsets_vector[i])); 619 array->set(i, Smi::FromInt(offsets_vector[i]));
614 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); 620 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
615 } 621 }
616 return Factory::NewJSArrayWithElements(array); 622 return Factory::NewJSArrayWithElements(array);
617 } 623 }
618 624
619 625
620 class OffsetsVector { 626 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
621 public: 627 Handle<String> subject) {
622 inline OffsetsVector(int num_registers)
623 : offsets_vector_length_(num_registers) {
624 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
625 vector_ = NewArray<int>(offsets_vector_length_);
626 } else {
627 vector_ = static_offsets_vector_;
628 }
629 }
630
631
632 inline ~OffsetsVector() {
633 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
634 DeleteArray(vector_);
635 vector_ = NULL;
636 }
637 }
638
639
640 inline int* vector() {
641 return vector_;
642 }
643
644
645 inline int length() {
646 return offsets_vector_length_;
647 }
648
649 private:
650 int* vector_;
651 int offsets_vector_length_;
652 static const int kStaticOffsetsVectorSize = 50;
653 static int static_offsets_vector_[kStaticOffsetsVectorSize];
654 };
655
656
657 int OffsetsVector::static_offsets_vector_[
658 OffsetsVector::kStaticOffsetsVectorSize];
659
660
661 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
662 Handle<String> subject,
663 Handle<Object> index) {
664 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
665 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
666
667 // Prepare space for the return values.
668 int number_of_registers = IrregexpNumberOfRegisters(regexp);
669 OffsetsVector offsets(number_of_registers);
670
671 int num_captures = IrregexpNumberOfCaptures(regexp);
672
673 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
674
675 Handle<String> subject16 = CachedStringToTwoByte(subject);
676
677 Handle<Object> result(IrregexpExecOnce(regexp,
678 num_captures,
679 subject16,
680 previous_index,
681 offsets.vector(),
682 offsets.length()));
683 return result;
684 }
685
686
687 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
688 Handle<String> subject,
689 Handle<Object> index) {
690 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); 628 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
691 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { 629 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
692 Handle<Object> compile_result = JscreCompile(regexp); 630 Handle<Object> compile_result = JscreCompile(regexp);
693 if (compile_result.is_null()) return compile_result; 631 if (compile_result.is_null()) return compile_result;
694 } 632 }
695 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); 633 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
696 634
635 // Prepare space for the return values.
697 int num_captures = JscreNumberOfCaptures(regexp); 636 int num_captures = JscreNumberOfCaptures(regexp);
698 637
699 OffsetsVector offsets((num_captures + 1) * 3); 638 OffsetsVector offsets((num_captures + 1) * 3);
700 639
701 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
702
703 Handle<String> subject16 = CachedStringToTwoByte(subject);
704
705 Handle<Object> result(JscreExecOnce(regexp,
706 num_captures,
707 subject,
708 previous_index,
709 subject16->GetTwoByteData(),
710 offsets.vector(),
711 offsets.length()));
712
713 return result;
714 }
715
716
717 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
718 Handle<String> subject) {
719 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
720 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
721
722 // Prepare space for the return values.
723 int number_of_registers = IrregexpNumberOfRegisters(regexp);
724 OffsetsVector offsets(number_of_registers);
725
726 int previous_index = 0; 640 int previous_index = 0;
727 641
728 Handle<JSArray> result = Factory::NewJSArray(0); 642 Handle<JSArray> result = Factory::NewJSArray(0);
729 int i = 0; 643 int i = 0;
730 Handle<Object> matches; 644 Handle<Object> matches;
731 645
732 Handle<String> subject16 = CachedStringToTwoByte(subject); 646 Handle<String> subject16 = CachedStringToTwoByte(subject);
733 647
734 do { 648 do {
735 if (previous_index > subject->length() || previous_index < 0) { 649 if (previous_index > subject->length() || previous_index < 0) {
736 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 650 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
737 // string length, there is no match. 651 // string length, there is no match.
738 matches = Factory::null_value(); 652 matches = Factory::null_value();
739 } else { 653 } else {
740 matches = IrregexpExecOnce(regexp, 654 matches = JscreExecOnce(regexp,
741 IrregexpNumberOfCaptures(regexp), 655 num_captures,
742 subject16, 656 subject,
743 previous_index, 657 previous_index,
744 offsets.vector(), 658 subject16->GetTwoByteData(),
745 offsets.length()); 659 offsets.vector(),
660 offsets.length());
746 661
747 if (matches->IsJSArray()) { 662 if (matches->IsJSArray()) {
748 SetElement(result, i, matches); 663 SetElement(result, i, matches);
749 i++; 664 i++;
750 previous_index = offsets.vector()[1]; 665 previous_index = offsets.vector()[1];
751 if (offsets.vector()[0] == offsets.vector()[1]) { 666 if (offsets.vector()[0] == offsets.vector()[1]) {
752 previous_index++; 667 previous_index++;
753 } 668 }
754 } 669 }
755 } 670 }
756 } while (matches->IsJSArray()); 671 } while (matches->IsJSArray());
757 672
758 // If we exited the loop with an exception, throw it. 673 // If we exited the loop with an exception, throw it.
759 if (matches->IsNull()) { 674 if (matches->IsNull()) {
760 // Exited loop normally. 675 // Exited loop normally.
761 return result; 676 return result;
762 } else { 677 } else {
763 // Exited loop with the exception in matches. 678 // Exited loop with the exception in matches.
764 return matches; 679 return matches;
765 } 680 }
766 } 681 }
767 682
768 683
769 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp, 684 // Irregexp implementation.
770 Handle<String> subject) { 685
771 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); 686
772 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { 687 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
773 Handle<Object> compile_result = JscreCompile(regexp); 688 bool is_ascii) {
774 if (compile_result.is_null()) return compile_result; 689 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
690 Handle<FixedArray> alternatives(
691 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
692 ASSERT_EQ(2, alternatives->length());
693
694 int index = is_ascii ? 0 : 1;
695 Object* entry = alternatives->get(index);
696 if (!entry->IsNull()) {
697 return Handle<FixedArray>(FixedArray::cast(entry));
775 } 698 }
776 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); 699
700 // Compile the RegExp.
701 ZoneScope zone_scope(DELETE_ON_EXIT);
702
703 JSRegExp::Flags flags = re->GetFlags();
704
705 Handle<String> pattern(re->Pattern());
706 StringShape shape(*pattern);
707 if (!pattern->IsFlat(shape)) {
708 pattern->Flatten(shape);
709 }
710
711 RegExpParseResult parse_result;
712 FlatStringReader reader(pattern);
713 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
714 // Throw an exception if we fail to parse the pattern.
715 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
716 ThrowRegExpException(re,
717 pattern,
718 parse_result.error,
719 "malformed_regexp");
720 return Handle<FixedArray>::null();
721 }
722 Handle<FixedArray> compiled_entry =
723 RegExpEngine::Compile(&parse_result,
724 NULL,
725 flags.is_ignore_case(),
726 flags.is_multiline(),
727 pattern,
728 is_ascii);
729 if (!compiled_entry.is_null()) {
730 alternatives->set(index, *compiled_entry);
731 }
732 return compiled_entry;
733 }
734
735
736 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
737 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
738 }
739
740
741 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
742 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
743 }
744
745
746 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
747 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
748 == RegExpMacroAssembler::kBytecodeImplementation);
749 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
750 }
751
752
753 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
754 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
755 != RegExpMacroAssembler::kBytecodeImplementation);
756 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
757 }
758
759
760 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
761 Handle<String> pattern,
762 JSRegExp::Flags flags) {
763 // Make space for ASCII and UC16 versions.
764 Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
765 alternatives->set_null(0);
766 alternatives->set_null(1);
767 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives);
768 return re;
769 }
770
771
772 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
773 Handle<String> subject,
774 Handle<Object> index) {
775 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
776 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
777
778 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
779 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
780 if (irregexp.is_null()) {
781 // We can't handle the RegExp with IRRegExp.
782 return Handle<Object>::null();
783 }
777 784
778 // Prepare space for the return values. 785 // Prepare space for the return values.
779 int num_captures = JscreNumberOfCaptures(regexp); 786 int number_of_registers = IrregexpNumberOfRegisters(irregexp);
787 OffsetsVector offsets(number_of_registers);
780 788
781 OffsetsVector offsets((num_captures + 1) * 3); 789 int num_captures = IrregexpNumberOfCaptures(irregexp);
790
791 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
792
793 #ifdef DEBUG
794 if (FLAG_trace_regexp_bytecodes) {
795 String* pattern = regexp->Pattern();
796 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
797 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
798 }
799 #endif
800 LOG(RegExpExecEvent(regexp, previous_index, subject));
801 return IrregexpExecOnce(irregexp,
802 num_captures,
803 subject,
804 previous_index,
805 offsets.vector(),
806 offsets.length());
807 }
808
809
810 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
811 Handle<String> subject) {
812 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
813
814 StringShape shape(*subject);
815 bool is_ascii = shape.IsAsciiRepresentation();
816 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
817 if (irregexp.is_null()) {
818 return Handle<Object>::null();
819 }
820
821 // Prepare space for the return values.
822 int number_of_registers = IrregexpNumberOfRegisters(irregexp);
823 OffsetsVector offsets(number_of_registers);
782 824
783 int previous_index = 0; 825 int previous_index = 0;
784 826
785 Handle<JSArray> result = Factory::NewJSArray(0); 827 Handle<JSArray> result = Factory::NewJSArray(0);
786 int i = 0; 828 int i = 0;
787 Handle<Object> matches; 829 Handle<Object> matches;
788 830
789 Handle<String> subject16 = CachedStringToTwoByte(subject); 831 if (!subject->IsFlat(shape)) {
832 subject->Flatten(shape);
833 }
790 834
791 do { 835 do {
792 if (previous_index > subject->length() || previous_index < 0) { 836 if (previous_index > subject->length() || previous_index < 0) {
793 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 837 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
794 // string length, there is no match. 838 // string length, there is no match.
795 matches = Factory::null_value(); 839 matches = Factory::null_value();
796 } else { 840 } else {
797 matches = JscreExecOnce(regexp, 841 #ifdef DEBUG
798 num_captures, 842 if (FLAG_trace_regexp_bytecodes) {
799 subject, 843 String* pattern = regexp->Pattern();
800 previous_index, 844 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
801 subject16->GetTwoByteData(), 845 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
802 offsets.vector(), 846 }
803 offsets.length()); 847 #endif
848 LOG(RegExpExecEvent(regexp, previous_index, subject));
849 matches = IrregexpExecOnce(irregexp,
850 IrregexpNumberOfCaptures(irregexp),
851 subject,
852 previous_index,
853 offsets.vector(),
854 offsets.length());
804 855
805 if (matches->IsJSArray()) { 856 if (matches->IsJSArray()) {
806 SetElement(result, i, matches); 857 SetElement(result, i, matches);
807 i++; 858 i++;
808 previous_index = offsets.vector()[1]; 859 previous_index = offsets.vector()[1];
809 if (offsets.vector()[0] == offsets.vector()[1]) { 860 if (offsets.vector()[0] == offsets.vector()[1]) {
810 previous_index++; 861 previous_index++;
811 } 862 }
812 } 863 }
813 } 864 }
814 } while (matches->IsJSArray()); 865 } while (matches->IsJSArray());
815 866
816 // If we exited the loop with an exception, throw it. 867 // If we exited the loop with an exception, throw it.
817 if (matches->IsNull()) { 868 if (matches->IsNull()) {
818 // Exited loop normally. 869 // Exited loop normally.
819 return result; 870 return result;
820 } else { 871 } else {
821 // Exited loop with the exception in matches. 872 // Exited loop with the exception in matches.
822 return matches; 873 return matches;
823 } 874 }
824 } 875 }
825 876
826 877
827 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) { 878 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
828 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); 879 int num_captures,
829 return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value(); 880 Handle<String> subject,
881 int previous_index,
882 int* offsets_vector,
883 int offsets_vector_length) {
884 bool rc;
885
886 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
887
888 switch (tag) {
889 case RegExpMacroAssembler::kIA32Implementation: {
890 #ifndef ARM
891 if (!subject->IsFlat(StringShape(*subject))) {
892 FlattenString(subject);
893 }
894 Handle<Code> code = IrregexpNativeCode(irregexp);
895
896 StringShape shape(*subject);
897
898 // Character offsets into string.
899 int start_offset = previous_index;
900 int end_offset = subject->length(shape);
901
902 if (shape.IsCons()) {
903 subject = Handle<String>(ConsString::cast(*subject)->first());
904 } else if (shape.IsSliced()) {
905 SlicedString* slice = SlicedString::cast(*subject);
906 start_offset += slice->start();
907 end_offset += slice->start();
908 subject = Handle<String>(slice->buffer());
909 }
910
911 // String is now either Sequential or External
912 StringShape flatshape(*subject);
913 bool is_ascii = flatshape.IsAsciiRepresentation();
914 int char_size = is_ascii ? sizeof(char) : sizeof(uc16); // NOLINT
Erik Corry 2008/12/08 12:47:52 It's part of the definition of the language that s
915
916 if (flatshape.IsExternal()) {
917 const byte* address;
918 if (is_ascii) {
919 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
920 address = reinterpret_cast<const byte*>(ext->resource()->data());
921 } else {
922 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
923 address = reinterpret_cast<const byte*>(ext->resource()->data());
924 }
925 rc = RegExpMacroAssemblerIA32::Execute(
926 *code,
927 &address,
928 start_offset * char_size,
929 end_offset * char_size,
930 offsets_vector,
931 previous_index == 0);
932 } else { // Sequential string
933 int byte_offset =
934 is_ascii ? SeqAsciiString::kHeaderSize - kHeapObjectTag:
935 SeqTwoByteString::kHeaderSize - kHeapObjectTag;
Erik Corry 2008/12/08 12:47:52 SeqAsciiString and SeqTwoByteString have methods f
936 rc = RegExpMacroAssemblerIA32::Execute(
937 *code,
938 subject.location(),
939 byte_offset + start_offset * char_size,
940 byte_offset + end_offset * char_size,
941 offsets_vector,
942 previous_index == 0);
943 }
944
945 if (rc) {
946 // Capture values are relative to start_offset only.
947 for (int i = 0; i < offsets_vector_length; i++) {
948 if (offsets_vector[i] >= 0) {
949 offsets_vector[i] += previous_index;
950 }
951 }
952 }
953 break;
954 #else
955 UNIMPLEMENTED();
956 rc = false;
957 break;
958 #endif
959 }
960 case RegExpMacroAssembler::kBytecodeImplementation: {
961 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
962 offsets_vector[i] = -1;
963 }
964 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
965
966 Handle<String> two_byte_subject = CachedStringToTwoByte(subject);
967
968 rc = IrregexpInterpreter::Match(byte_codes,
969 two_byte_subject,
970 offsets_vector,
971 previous_index);
972 break;
973 }
974 case RegExpMacroAssembler::kARMImplementation:
975 default:
976 UNREACHABLE();
977 rc = false;
978 break;
979 }
980
981 if (!rc) {
982 return Factory::null_value();
983 }
984
985 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
986 // The captures come in (start, end+1) pairs.
987 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
988 array->set(i, Smi::FromInt(offsets_vector[i]));
989 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
990 }
991 return Factory::NewJSArrayWithElements(array);
830 } 992 }
831 993
832 994
833 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
834 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
835 return ByteArray::cast(value->get(kJscreInternalIndex));
836 }
837
838
839 int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
840 FixedArray* value =
841 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
842 return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
843 }
844
845
846 int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
847 FixedArray* value =
848 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
849 return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
850 }
851
852
853 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
854 FixedArray* value =
855 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
856 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
857 }
858
859
860 // ------------------------------------------------------------------- 995 // -------------------------------------------------------------------
861 // Implmentation of the Irregexp regular expression engine. 996 // Implmentation of the Irregexp regular expression engine.
862 // 997 //
863 // The Irregexp regular expression engine is intended to be a complete 998 // The Irregexp regular expression engine is intended to be a complete
864 // implementation of ECMAScript regular expressions. It generates either 999 // implementation of ECMAScript regular expressions. It generates either
865 // bytecodes or native code. 1000 // bytecodes or native code.
866 1001
867 // The Irregexp regexp engine is structured in three steps. 1002 // The Irregexp regexp engine is structured in three steps.
868 // 1) The parser generates an abstract syntax tree. See ast.cc. 1003 // 1) The parser generates an abstract syntax tree. See ast.cc.
869 // 2) From the AST a node network is created. The nodes are all 1004 // 2) From the AST a node network is created. The nodes are all
(...skipping 2598 matching lines...) Expand 10 before | Expand all | Expand 10 after
3468 void DispatchTableConstructor::VisitAction(ActionNode* that) { 3603 void DispatchTableConstructor::VisitAction(ActionNode* that) {
3469 RegExpNode* target = that->on_success(); 3604 RegExpNode* target = that->on_success();
3470 target->Accept(this); 3605 target->Accept(this);
3471 } 3606 }
3472 3607
3473 3608
3474 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 3609 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
3475 RegExpNode** node_return, 3610 RegExpNode** node_return,
3476 bool ignore_case, 3611 bool ignore_case,
3477 bool is_multiline, 3612 bool is_multiline,
3478 Handle<String> pattern) { 3613 Handle<String> pattern,
3614 bool is_ascii) {
3479 RegExpCompiler compiler(input->capture_count, ignore_case); 3615 RegExpCompiler compiler(input->capture_count, ignore_case);
3480 // Wrap the body of the regexp in capture #0. 3616 // Wrap the body of the regexp in capture #0.
3481 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 3617 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
3482 0, 3618 0,
3483 &compiler, 3619 &compiler,
3484 compiler.accept()); 3620 compiler.accept());
3485 // Add a .*? at the beginning, outside the body capture. 3621 // Add a .*? at the beginning, outside the body capture.
3486 // Note: We could choose to not add this if the regexp is anchored at 3622 // Note: We could choose to not add this if the regexp is anchored at
3487 // the start of the input but I'm not sure how best to do that and 3623 // the start of the input but I'm not sure how best to do that and
3488 // since we don't even handle ^ yet I'm saving that optimization for 3624 // since we don't even handle ^ yet I'm saving that optimization for
3489 // later. 3625 // later.
3490 RegExpNode* node = RegExpQuantifier::ToNode(0, 3626 RegExpNode* node = RegExpQuantifier::ToNode(0,
3491 RegExpQuantifier::kInfinity, 3627 RegExpQuantifier::kInfinity,
3492 false, 3628 false,
3493 new RegExpCharacterClass('*'), 3629 new RegExpCharacterClass('*'),
3494 &compiler, 3630 &compiler,
3495 captured_body); 3631 captured_body);
3496 if (node_return != NULL) *node_return = node; 3632 if (node_return != NULL) *node_return = node;
3497 Analysis analysis(ignore_case); 3633 Analysis analysis(ignore_case);
3498 analysis.EnsureAnalyzed(node); 3634 analysis.EnsureAnalyzed(node);
3499 3635
3500 NodeInfo info = *node->info(); 3636 NodeInfo info = *node->info();
3501 node = node->EnsureExpanded(&info); 3637 node = node->EnsureExpanded(&info);
3502 3638
3503 if (!FLAG_irregexp) {
3504 return Handle<FixedArray>::null();
3505 }
3506
3507 if (is_multiline && !FLAG_attempt_multiline_irregexp) { 3639 if (is_multiline && !FLAG_attempt_multiline_irregexp) {
3508 return Handle<FixedArray>::null(); 3640 return Handle<FixedArray>::null();
3509 } 3641 }
3510 3642
3511 if (FLAG_irregexp_native) { 3643 if (FLAG_irregexp_native) {
3512 #ifdef ARM 3644 #ifdef ARM
3513 // Unimplemented, fall-through to bytecode implementation. 3645 // Unimplemented, fall-through to bytecode implementation.
3514 #else // IA32 3646 #else // IA32
3515 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, 3647 RegExpMacroAssemblerIA32::Mode mode;
3648 if (is_ascii) {
3649 mode = RegExpMacroAssemblerIA32::ASCII;
3650 } else {
3651 mode = RegExpMacroAssemblerIA32::UC16;
3652 }
3653 RegExpMacroAssemblerIA32 macro_assembler(mode,
3516 (input->capture_count + 1) * 2); 3654 (input->capture_count + 1) * 2);
3517 return compiler.Assemble(&macro_assembler, 3655 return compiler.Assemble(&macro_assembler,
3518 node, 3656 node,
3519 input->capture_count, 3657 input->capture_count,
3520 pattern); 3658 pattern);
3521 #endif 3659 #endif
3522 } 3660 }
3523 EmbeddedVector<byte, 1024> codes; 3661 EmbeddedVector<byte, 1024> codes;
3524 RegExpMacroAssemblerIrregexp macro_assembler(codes); 3662 RegExpMacroAssemblerIrregexp macro_assembler(codes);
3525 return compiler.Assemble(&macro_assembler, 3663 return compiler.Assemble(&macro_assembler,
3526 node, 3664 node,
3527 input->capture_count, 3665 input->capture_count,
3528 pattern); 3666 pattern);
3529 } 3667 }
3530 3668
3531 3669
3532 }} // namespace v8::internal 3670 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | src/regexp-macro-assembler-ia32.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698