Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(554)

Side by Side Diff: src/jsregexp.cc

Issue 10943: Wire Regexp2000 up to the normal JS RegExp object. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 22 matching lines...) Expand all
33 #include "ast.h" 33 #include "ast.h"
34 #include "execution.h" 34 #include "execution.h"
35 #include "factory.h" 35 #include "factory.h"
36 #include "jsregexp-inl.h" 36 #include "jsregexp-inl.h"
37 #include "platform.h" 37 #include "platform.h"
38 #include "runtime.h" 38 #include "runtime.h"
39 #include "top.h" 39 #include "top.h"
40 #include "compilation-cache.h" 40 #include "compilation-cache.h"
41 #include "string-stream.h" 41 #include "string-stream.h"
42 #include "parser.h" 42 #include "parser.h"
43 #include "assembler-re2k.h"
43 #include "regexp-macro-assembler.h" 44 #include "regexp-macro-assembler.h"
45 #include "regexp-macro-assembler-re2k.h"
46 #include "interpreter-re2k.h"
44 47
45 // Including pcre.h undefines DEBUG to avoid getting debug output from 48 // Including pcre.h undefines DEBUG to avoid getting debug output from
46 // the JSCRE implementation. Make sure to redefine it in debug mode 49 // the JSCRE implementation. Make sure to redefine it in debug mode
47 // after having included the header file. 50 // after having included the header file.
48 #ifdef DEBUG 51 #ifdef DEBUG
49 #include "third_party/jscre/pcre.h" 52 #include "third_party/jscre/pcre.h"
50 #define DEBUG 53 #define DEBUG
51 #else 54 #else
52 #include "third_party/jscre/pcre.h" 55 #include "third_party/jscre/pcre.h"
53 #endif 56 #endif
54 57
55 58
56 namespace v8 { namespace internal { 59 namespace v8 { namespace internal {
57 60
58 61
59 #define CAPTURE_INDEX 0
60 #define INTERNAL_INDEX 1
61
62 static Failure* malloc_failure; 62 static Failure* malloc_failure;
63 63
64 static void* JSREMalloc(size_t size) { 64 static void* JSREMalloc(size_t size) {
65 Object* obj = Heap::AllocateByteArray(size); 65 Object* obj = Heap::AllocateByteArray(size);
66 66
67 // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile 67 // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile
68 // will return NULL to the caller, performs GC there. 68 // will return NULL to the caller, performs GC there.
69 // Also pass failure information to the caller. 69 // Also pass failure information to the caller.
70 if (obj->IsFailure()) { 70 if (obj->IsFailure()) {
71 malloc_failure = Failure::cast(obj); 71 malloc_failure = Failure::cast(obj);
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 if (atom != NULL && !flags.is_ignore_case()) { 222 if (atom != NULL && !flags.is_ignore_case()) {
223 if (parse_result.has_character_escapes) { 223 if (parse_result.has_character_escapes) {
224 Vector<const uc16> atom_pattern = atom->data(); 224 Vector<const uc16> atom_pattern = atom->data();
225 Handle<String> atom_string = 225 Handle<String> atom_string =
226 Factory::NewStringFromTwoByte(atom_pattern); 226 Factory::NewStringFromTwoByte(atom_pattern);
227 result = AtomCompile(re, pattern, flags, atom_string); 227 result = AtomCompile(re, pattern, flags, atom_string);
228 } else { 228 } else {
229 result = AtomCompile(re, pattern, flags, pattern); 229 result = AtomCompile(re, pattern, flags, pattern);
230 } 230 }
231 } else { 231 } else {
232 result = JsrePrepare(re, pattern, flags); 232 RegExpNode* node = NULL;
233 Handle<FixedArray> re2k_data =
234 RegExpEngine::Compile(&parse_result, &node);
235 if (re2k_data.is_null()) {
236 result = JscrePrepare(re, pattern, flags);
237 } else {
238 result = Re2kPrepare(re, pattern, flags, re2k_data);
239 }
233 } 240 }
234 Object* data = re->data(); 241 Object* data = re->data();
235 if (data->IsFixedArray()) { 242 if (data->IsFixedArray()) {
236 // If compilation succeeded then the data is set on the regexp 243 // If compilation succeeded then the data is set on the regexp
237 // and we can store it in the cache. 244 // and we can store it in the cache.
238 Handle<FixedArray> data(FixedArray::cast(re->data())); 245 Handle<FixedArray> data(FixedArray::cast(re->data()));
239 CompilationCache::PutRegExp(pattern, flags, data); 246 CompilationCache::PutRegExp(pattern, flags, data);
240 } 247 }
241 } 248 }
242 249
243 LOG(RegExpCompileEvent(re, in_cache)); 250 LOG(RegExpCompileEvent(re, in_cache));
244 return result; 251 return result;
245 } 252 }
246 253
247 254
248 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 255 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
249 Handle<String> subject, 256 Handle<String> subject,
250 Handle<Object> index) { 257 Handle<Object> index) {
251 switch (regexp->TypeTag()) { 258 switch (regexp->TypeTag()) {
252 case JSRegExp::JSCRE: 259 case JSRegExp::JSCRE:
253 return JsreExec(regexp, subject, index); 260 return JscreExec(regexp, subject, index);
254 case JSRegExp::ATOM: 261 case JSRegExp::ATOM:
255 return AtomExec(regexp, subject, index); 262 return AtomExec(regexp, subject, index);
263 case JSRegExp::RE2K:
264 return Re2kExec(regexp, subject, index);
256 default: 265 default:
257 UNREACHABLE(); 266 UNREACHABLE();
258 return Handle<Object>(); 267 return Handle<Object>();
259 } 268 }
260 } 269 }
261 270
262 271
263 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 272 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
264 Handle<String> subject) { 273 Handle<String> subject) {
265 switch (regexp->TypeTag()) { 274 switch (regexp->TypeTag()) {
266 case JSRegExp::JSCRE: 275 case JSRegExp::JSCRE:
267 return JsreExecGlobal(regexp, subject); 276 return JscreExecGlobal(regexp, subject);
268 case JSRegExp::ATOM: 277 case JSRegExp::ATOM:
269 return AtomExecGlobal(regexp, subject); 278 return AtomExecGlobal(regexp, subject);
279 case JSRegExp::RE2K:
280 return Re2kExecGlobal(regexp, subject);
270 default: 281 default:
271 UNREACHABLE(); 282 UNREACHABLE();
272 return Handle<Object>(); 283 return Handle<Object>();
273 } 284 }
274 } 285 }
275 286
276 287
277 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, 288 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
278 Handle<String> pattern, 289 Handle<String> pattern,
279 JSRegExp::Flags flags, 290 JSRegExp::Flags flags,
(...skipping 11 matching lines...) Expand all
291 uint32_t start_index; 302 uint32_t start_index;
292 if (!Array::IndexFromObject(*index, &start_index)) { 303 if (!Array::IndexFromObject(*index, &start_index)) {
293 return Handle<Smi>(Smi::FromInt(-1)); 304 return Handle<Smi>(Smi::FromInt(-1));
294 } 305 }
295 306
296 LOG(RegExpExecEvent(re, start_index, subject)); 307 LOG(RegExpExecEvent(re, start_index, subject));
297 int value = Runtime::StringMatch(subject, needle, start_index); 308 int value = Runtime::StringMatch(subject, needle, start_index);
298 if (value == -1) return Factory::null_value(); 309 if (value == -1) return Factory::null_value();
299 310
300 Handle<FixedArray> array = Factory::NewFixedArray(2); 311 Handle<FixedArray> array = Factory::NewFixedArray(2);
301 array->set(0, 312 array->set(0, Smi::FromInt(value));
302 Smi::FromInt(value), 313 array->set(1, Smi::FromInt(value + needle->length()));
303 SKIP_WRITE_BARRIER);
304 array->set(1,
305 Smi::FromInt(value + needle->length()),
306 SKIP_WRITE_BARRIER);
307 return Factory::NewJSArrayWithElements(array); 314 return Factory::NewJSArrayWithElements(array);
308 } 315 }
309 316
310 317
311 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, 318 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
312 Handle<String> subject) { 319 Handle<String> subject) {
313 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 320 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
314 Handle<JSArray> result = Factory::NewJSArray(1); 321 Handle<JSArray> result = Factory::NewJSArray(1);
315 int index = 0; 322 int index = 0;
316 int match_count = 0; 323 int match_count = 0;
317 int subject_length = subject->length(); 324 int subject_length = subject->length();
318 int needle_length = needle->length(); 325 int needle_length = needle->length();
319 while (true) { 326 while (true) {
320 LOG(RegExpExecEvent(re, index, subject)); 327 LOG(RegExpExecEvent(re, index, subject));
321 int value = -1; 328 int value = -1;
322 if (index + needle_length <= subject_length) { 329 if (index + needle_length <= subject_length) {
323 value = Runtime::StringMatch(subject, needle, index); 330 value = Runtime::StringMatch(subject, needle, index);
324 } 331 }
325 if (value == -1) break; 332 if (value == -1) break;
326 HandleScope scope; 333 HandleScope scope;
327 int end = value + needle_length; 334 int end = value + needle_length;
328 335
329 Handle<FixedArray> array = Factory::NewFixedArray(2); 336 Handle<FixedArray> array = Factory::NewFixedArray(2);
330 array->set(0, 337 array->set(0, Smi::FromInt(value));
331 Smi::FromInt(value), 338 array->set(1, Smi::FromInt(end));
332 SKIP_WRITE_BARRIER);
333 array->set(1,
334 Smi::FromInt(end),
335 SKIP_WRITE_BARRIER);
336 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); 339 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
337 SetElement(result, match_count, pair); 340 SetElement(result, match_count, pair);
338 match_count++; 341 match_count++;
339 index = end; 342 index = end;
340 if (needle_length == 0) index++; 343 if (needle_length == 0) index++;
341 } 344 }
342 return result; 345 return result;
343 } 346 }
344 347
345 348
346 Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re, 349 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
347 Handle<String> pattern, 350 Handle<String> pattern,
348 JSRegExp::Flags flags) { 351 JSRegExp::Flags flags) {
349 Handle<Object> value(Heap::undefined_value()); 352 Handle<Object> value(Heap::undefined_value());
350 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); 353 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
351 return re; 354 return re;
352 } 355 }
353 356
354 357
358 Handle<Object>RegExpImpl::Re2kPrepare(Handle<JSRegExp> re,
359 Handle<String> pattern,
360 JSRegExp::Flags flags,
361 Handle<FixedArray> re2k_data) {
362 Factory::SetRegExpData(re, JSRegExp::RE2K, pattern, flags, re2k_data);
363 return re;
364 }
365
366
355 static inline Object* DoCompile(String* pattern, 367 static inline Object* DoCompile(String* pattern,
356 JSRegExp::Flags flags, 368 JSRegExp::Flags flags,
357 unsigned* number_of_captures, 369 unsigned* number_of_captures,
358 const char** error_message, 370 const char** error_message,
359 JscreRegExp** code) { 371 JscreRegExp** code) {
360 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() 372 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
361 ? JSRegExpIgnoreCase 373 ? JSRegExpIgnoreCase
362 : JSRegExpDoNotIgnoreCase; 374 : JSRegExpDoNotIgnoreCase;
363 JSRegExpMultilineOption multiline_option = flags.is_multiline() 375 JSRegExpMultilineOption multiline_option = flags.is_multiline()
364 ? JSRegExpMultiline 376 ? JSRegExpMultiline
(...skipping 26 matching lines...) Expand all
391 const char** error_message, 403 const char** error_message,
392 JscreRegExp** code) { 404 JscreRegExp** code) {
393 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern, 405 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
394 flags, 406 flags,
395 number_of_captures, 407 number_of_captures,
396 error_message, 408 error_message,
397 code)); 409 code));
398 } 410 }
399 411
400 412
401 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) { 413 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
402 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE); 414 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
403 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()); 415 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
404 416
405 Handle<String> pattern(re->Pattern()); 417 Handle<String> pattern(re->Pattern());
406 JSRegExp::Flags flags = re->GetFlags(); 418 JSRegExp::Flags flags = re->GetFlags();
407 419
408 Handle<String> two_byte_pattern = StringToTwoByte(pattern); 420 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
409 421
410 unsigned number_of_captures; 422 unsigned number_of_captures;
411 const char* error_message = NULL; 423 const char* error_message = NULL;
(...skipping 16 matching lines...) Expand all
428 Handle<Object> regexp_err = 440 Handle<Object> regexp_err =
429 Factory::NewSyntaxError("malformed_regexp", array); 441 Factory::NewSyntaxError("malformed_regexp", array);
430 Top::Throw(*regexp_err); 442 Top::Throw(*regexp_err);
431 return Handle<Object>(); 443 return Handle<Object>();
432 } 444 }
433 445
434 // Convert the return address to a ByteArray pointer. 446 // Convert the return address to a ByteArray pointer.
435 Handle<ByteArray> internal( 447 Handle<ByteArray> internal(
436 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); 448 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
437 449
438 Handle<FixedArray> value = Factory::NewFixedArray(2); 450 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
439 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures)); 451 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures));
440 value->set(INTERNAL_INDEX, *internal); 452 value->set(kJscreInternalIndex, *internal);
441 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); 453 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
442 454
443 return re; 455 return re;
444 } 456 }
445 457
446 458
447 Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp, 459 Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp,
448 int num_captures, 460 int num_captures,
449 Handle<String> subject, 461 Handle<String> subject,
450 int previous_index, 462 int previous_index,
451 const uc16* two_byte_subject, 463 const uc16* two_byte_subject,
452 int* offsets_vector, 464 int* offsets_vector,
453 int offsets_vector_length) { 465 int offsets_vector_length) {
466 bool rc;
467 {
468 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
469 offsets_vector[i] = -1;
470 }
471
472 AssertNoAllocation a;
473
474 LOG(RegExpExecEvent(regexp, previous_index, subject));
475
476 Handle<ByteArray> byte_codes = Re2kCode(regexp);
477
478 rc = Re2kInterpreter::Match(byte_codes,
479 subject,
480 offsets_vector,
481 previous_index);
482 }
483
484 if (!rc) {
485 return Factory::null_value();
486 }
487
488 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
489 // The captures come in (start, end+1) pairs.
490 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
491 array->set(i, Smi::FromInt(offsets_vector[i]));
492 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
493 }
494 return Factory::NewJSArrayWithElements(array);
495 }
496
497
498 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
499 int num_captures,
500 Handle<String> subject,
501 int previous_index,
502 const uc16* two_byte_subject,
503 int* offsets_vector,
504 int offsets_vector_length) {
454 int rc; 505 int rc;
455 { 506 {
456 AssertNoAllocation a; 507 AssertNoAllocation a;
457 ByteArray* internal = JsreInternal(regexp); 508 ByteArray* internal = JscreInternal(regexp);
458 const JscreRegExp* js_regexp = 509 const JscreRegExp* js_regexp =
459 reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress()); 510 reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());
460 511
461 LOG(RegExpExecEvent(regexp, previous_index, subject)); 512 LOG(RegExpExecEvent(regexp, previous_index, subject));
462 513
463 rc = jsRegExpExecute(js_regexp, 514 rc = jsRegExpExecute(js_regexp,
464 two_byte_subject, 515 two_byte_subject,
465 subject->length(), 516 subject->length(),
466 previous_index, 517 previous_index,
467 offsets_vector, 518 offsets_vector,
(...skipping 13 matching lines...) Expand all
481 Handle<Object> code(Smi::FromInt(rc)); 532 Handle<Object> code(Smi::FromInt(rc));
482 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code }; 533 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
483 Handle<Object> regexp_err( 534 Handle<Object> regexp_err(
484 Factory::NewTypeError("jsre_error", HandleVector(args, 2))); 535 Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
485 return Handle<Object>(Top::Throw(*regexp_err)); 536 return Handle<Object>(Top::Throw(*regexp_err));
486 } 537 }
487 538
488 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); 539 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
489 // The captures come in (start, end+1) pairs. 540 // The captures come in (start, end+1) pairs.
490 for (int i = 0; i < 2 * (num_captures+1); i += 2) { 541 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
491 array->set(i, 542 array->set(i, Smi::FromInt(offsets_vector[i]));
492 Smi::FromInt(offsets_vector[i]), 543 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
493 SKIP_WRITE_BARRIER);
494 array->set(i+1,
495 Smi::FromInt(offsets_vector[i+1]),
496 SKIP_WRITE_BARRIER);
497 } 544 }
498 return Factory::NewJSArrayWithElements(array); 545 return Factory::NewJSArrayWithElements(array);
499 } 546 }
500 547
501 548
502 class OffsetsVector { 549 class OffsetsVector {
503 public: 550 public:
504 inline OffsetsVector(int num_captures) { 551 inline OffsetsVector(int num_registers) :
505 offsets_vector_length_ = (num_captures + 1) * 3; 552 offsets_vector_length_(num_registers) {
506 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { 553 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
507 vector_ = NewArray<int>(offsets_vector_length_); 554 vector_ = NewArray<int>(offsets_vector_length_);
508 } else { 555 } else {
509 vector_ = static_offsets_vector_; 556 vector_ = static_offsets_vector_;
510 } 557 }
511 } 558 }
512 559
513 560
514 inline ~OffsetsVector() { 561 inline ~OffsetsVector() {
515 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { 562 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
516 DeleteArray(vector_); 563 DeleteArray(vector_);
517 vector_ = NULL; 564 vector_ = NULL;
518 } 565 }
519 } 566 }
520 567
521 568
522 inline int* vector() { 569 inline int* vector() {
523 return vector_; 570 return vector_;
524 } 571 }
525 572
526 573
527 inline int length() { 574 inline int length() {
528 return offsets_vector_length_; 575 return offsets_vector_length_;
529 } 576 }
530 577
531 private: 578 private:
532 int* vector_; 579 int* vector_;
533 int offsets_vector_length_; 580 int offsets_vector_length_;
534 static const int kStaticOffsetsVectorSize = 30; 581 static const int kStaticOffsetsVectorSize = 50;
535 static int static_offsets_vector_[kStaticOffsetsVectorSize]; 582 static int static_offsets_vector_[kStaticOffsetsVectorSize];
536 }; 583 };
537 584
538 585
539 int OffsetsVector::static_offsets_vector_[ 586 int OffsetsVector::static_offsets_vector_[
540 OffsetsVector::kStaticOffsetsVectorSize]; 587 OffsetsVector::kStaticOffsetsVectorSize];
541 588
542 589
543 Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp, 590 Handle<Object> RegExpImpl::Re2kExec(Handle<JSRegExp> regexp,
544 Handle<String> subject, 591 Handle<String> subject,
545 Handle<Object> index) { 592 Handle<Object> index) {
593 ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
594 ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
595
596 // Prepare space for the return values.
597 int number_of_registers = Re2kNumberOfRegisters(regexp);
598 OffsetsVector offsets(number_of_registers);
599
600 int num_captures = Re2kNumberOfCaptures(regexp);
601
602 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
603
604 Handle<String> subject16 = CachedStringToTwoByte(subject);
605
606 Handle<Object> result(Re2kExecOnce(regexp,
607 num_captures,
608 subject,
609 previous_index,
610 subject16->GetTwoByteData(),
611 offsets.vector(),
612 offsets.length()));
613 return result;
614 }
615
616
617 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
618 Handle<String> subject,
619 Handle<Object> index) {
546 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); 620 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
547 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { 621 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
548 Handle<Object> compile_result = JsreCompile(regexp); 622 Handle<Object> compile_result = JscreCompile(regexp);
623 if (compile_result->IsException()) return compile_result;
624 }
625 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
626
627 int num_captures = JscreNumberOfCaptures(regexp);
628
629 OffsetsVector offsets((num_captures + 1) * 3);
630
631 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
632
633 Handle<String> subject16 = CachedStringToTwoByte(subject);
634
635 Handle<Object> result(JscreExecOnce(regexp,
636 num_captures,
637 subject,
638 previous_index,
639 subject16->GetTwoByteData(),
640 offsets.vector(),
641 offsets.length()));
642
643 return result;
644 }
645
646
647 Handle<Object> RegExpImpl::Re2kExecGlobal(Handle<JSRegExp> regexp,
648 Handle<String> subject) {
649 ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
Lasse Reichstein 2008/11/14 10:03:40 This method looks like an almost, but not entirely
650 ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
651
652 // Prepare space for the return values.
653 int number_of_registers = Re2kNumberOfRegisters(regexp);
654 OffsetsVector offsets(number_of_registers);
655
656 int previous_index = 0;
657
658 Handle<JSArray> result = Factory::NewJSArray(0);
659 int i = 0;
660 Handle<Object> matches;
661
662 Handle<String> subject16 = CachedStringToTwoByte(subject);
663
664 do {
665 if (previous_index > subject->length() || previous_index < 0) {
666 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
667 // string length, there is no match.
668 matches = Factory::null_value();
669 } else {
670 matches = Re2kExecOnce(regexp,
671 Re2kNumberOfCaptures(regexp),
672 subject,
673 previous_index,
674 subject16->GetTwoByteData(),
675 offsets.vector(),
676 offsets.length());
677
678 if (matches->IsJSArray()) {
679 SetElement(result, i, matches);
680 i++;
681 previous_index = offsets.vector()[1];
682 if (offsets.vector()[0] == offsets.vector()[1]) {
683 previous_index++;
684 }
685 }
686 }
687 } while (matches->IsJSArray());
688
689 // If we exited the loop with an exception, throw it.
690 if (matches->IsNull()) { // Exited loop normally.
691 return result;
692 } else { // Exited loop with the exception in matches.
693 return matches;
694 }
695 }
696
697
698 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
699 Handle<String> subject) {
700 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
701 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
702 Handle<Object> compile_result = JscreCompile(regexp);
549 if (compile_result->IsException()) return compile_result; 703 if (compile_result->IsException()) return compile_result;
550 } 704 }
551 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); 705 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
552 706
553 // Prepare space for the return values. 707 // Prepare space for the return values.
554 int num_captures = JsreCapture(regexp); 708 int num_captures = JscreNumberOfCaptures(regexp);
555 709
556 OffsetsVector offsets(num_captures); 710 OffsetsVector offsets((num_captures + 1) * 3);
557
558 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
559
560 Handle<String> subject16 = CachedStringToTwoByte(subject);
561
562 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
563 previous_index,
564 subject16->GetTwoByteData(),
565 offsets.vector(), offsets.length()));
566
567 return result;
568 }
569
570
571 Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
572 Handle<String> subject) {
573 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
574 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
575 Handle<Object> compile_result = JsreCompile(regexp);
576 if (compile_result->IsException()) return compile_result;
577 }
578 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
579
580 // Prepare space for the return values.
581 int num_captures = JsreCapture(regexp);
582
583 OffsetsVector offsets(num_captures);
584 711
585 int previous_index = 0; 712 int previous_index = 0;
586 713
587 Handle<JSArray> result = Factory::NewJSArray(0); 714 Handle<JSArray> result = Factory::NewJSArray(0);
588 int i = 0; 715 int i = 0;
589 Handle<Object> matches; 716 Handle<Object> matches;
590 717
591 Handle<String> subject16 = CachedStringToTwoByte(subject); 718 Handle<String> subject16 = CachedStringToTwoByte(subject);
592 719
593 do { 720 do {
594 if (previous_index > subject->length() || previous_index < 0) { 721 if (previous_index > subject->length() || previous_index < 0) {
595 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 722 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
596 // string length, there is no match. 723 // string length, there is no match.
597 matches = Factory::null_value(); 724 matches = Factory::null_value();
598 } else { 725 } else {
599 matches = JsreExecOnce(regexp, num_captures, subject, previous_index, 726 matches = JscreExecOnce(regexp,
600 subject16->GetTwoByteData(), 727 num_captures,
601 offsets.vector(), offsets.length()); 728 subject,
729 previous_index,
730 subject16->GetTwoByteData(),
731 offsets.vector(),
732 offsets.length());
602 733
603 if (matches->IsJSArray()) { 734 if (matches->IsJSArray()) {
604 SetElement(result, i, matches); 735 SetElement(result, i, matches);
605 i++; 736 i++;
606 previous_index = offsets.vector()[1]; 737 previous_index = offsets.vector()[1];
607 if (offsets.vector()[0] == offsets.vector()[1]) { 738 if (offsets.vector()[0] == offsets.vector()[1]) {
608 previous_index++; 739 previous_index++;
609 } 740 }
610 } 741 }
611 } 742 }
612 } while (matches->IsJSArray()); 743 } while (matches->IsJSArray());
613 744
614 // If we exited the loop with an exception, throw it. 745 // If we exited the loop with an exception, throw it.
615 if (matches->IsNull()) { // Exited loop normally. 746 if (matches->IsNull()) { // Exited loop normally.
616 return result; 747 return result;
617 } else { // Exited loop with the exception in matches. 748 } else { // Exited loop with the exception in matches.
618 return matches; 749 return matches;
619 } 750 }
620 } 751 }
621 752
622 753
623 int RegExpImpl::JsreCapture(Handle<JSRegExp> re) { 754 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
624 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); 755 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
625 return Smi::cast(value->get(CAPTURE_INDEX))->value(); 756 return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->
757 value();
626 } 758 }
627 759
628 760
629 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { 761 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
630 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); 762 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
631 return ByteArray::cast(value->get(INTERNAL_INDEX)); 763 return ByteArray::cast(value->get(kJscreInternalIndex));
764 }
765
766
767 int RegExpImpl::Re2kNumberOfCaptures(Handle<JSRegExp> re) {
768 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
769 return Smi::cast(value->get(kRe2kNumberOfCapturesIndex))->value();
770 }
771
772
773 int RegExpImpl::Re2kNumberOfRegisters(Handle<JSRegExp> re) {
774 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
775 return Smi::cast(value->get(kRe2kNumberOfRegistersIndex))->value();
776 }
777
778
779 Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) {
780 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
781 return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex)));
632 } 782 }
633 783
634 784
635 // ------------------------------------------------------------------- 785 // -------------------------------------------------------------------
636 // New regular expression engine 786 // New regular expression engine
637 787
638 788
639 class DotPrinter; 789 class DotPrinter;
640 790
641 791
642 class RegExpCompiler { 792 class RegExpCompiler {
643 public: 793 public:
644 explicit RegExpCompiler(int capture_count) 794 explicit RegExpCompiler(int capture_count)
645 : next_register_(2 * capture_count), 795 : next_register_(2 * capture_count),
646 work_list_(NULL) { } 796 work_list_(NULL) { }
647 797
648 int AllocateRegister() { return next_register_++; } 798 int AllocateRegister() { return next_register_++; }
649 799
650 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 800 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
651 RegExpNode* start); 801 RegExpNode* start,
802 int capture_count);
652 803
653 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 804 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
654 805
655 static const int kImplementationOffset = 0;
656 static const int kNumberOfRegistersOffset = 0;
657 static const int kCodeOffset = 1;
658
659 RegExpMacroAssembler* macro_assembler() { 806 RegExpMacroAssembler* macro_assembler() {
660 return macro_assembler_; 807 return macro_assembler_;
661 } 808 }
662 private: 809 private:
663 int next_register_; 810 int next_register_;
664 List<RegExpNode*>* work_list_; 811 List<RegExpNode*>* work_list_;
665 RegExpMacroAssembler* macro_assembler_; 812 RegExpMacroAssembler* macro_assembler_;
666 }; 813 };
667 814
668 815
816 // Attempts to compile the regexp using a Regexp2000 code generator. Returns
817 // a fixed array or a null handle depending on whether it succeeded.
669 Handle<FixedArray> RegExpCompiler::Assemble( 818 Handle<FixedArray> RegExpCompiler::Assemble(
670 RegExpMacroAssembler* macro_assembler, 819 RegExpMacroAssembler* macro_assembler,
671 RegExpNode* start) { 820 RegExpNode* start,
821 int capture_count) {
Lasse Reichstein 2008/11/14 10:03:40 Also need to know if we ignore case.
672 macro_assembler_ = macro_assembler; 822 macro_assembler_ = macro_assembler;
673 List <RegExpNode*> work_list(0); 823 List <RegExpNode*> work_list(0);
674 work_list_ = &work_list; 824 work_list_ = &work_list;
675 start->GoTo(this); 825 Label fail;
826 macro_assembler->PushBacktrack(&fail);
827 if (!start->GoTo(this)) {
828 fail.Unuse();
829 return Handle<FixedArray>::null();
830 }
676 while (!work_list.is_empty()) { 831 while (!work_list.is_empty()) {
677 work_list.RemoveLast()->Emit(this); 832 if (!work_list.RemoveLast()->Emit(this)) {
833 fail.Unuse();
834 return Handle<FixedArray>::null();
835 }
678 } 836 }
679 Handle<FixedArray> array = Factory::NewFixedArray(3); 837 macro_assembler->Bind(&fail);
680 array->set(kImplementationOffset, 838 macro_assembler->Fail();
681 Smi::FromInt(macro_assembler->Implementation()), 839 Handle<FixedArray> array =
682 SKIP_WRITE_BARRIER); 840 Factory::NewFixedArray(RegExpImpl::kRe2kDataLength);
683 array->set(kNumberOfRegistersOffset, 841 array->set(RegExpImpl::kRe2kImplementationIndex,
684 Smi::FromInt(next_register_), 842 Smi::FromInt(macro_assembler->Implementation()));
685 SKIP_WRITE_BARRIER); 843 array->set(RegExpImpl::kRe2kNumberOfRegistersIndex,
844 Smi::FromInt(next_register_));
845 array->set(RegExpImpl::kRe2kNumberOfCapturesIndex,
846 Smi::FromInt(capture_count));
686 Handle<Object> code = macro_assembler->GetCode(); 847 Handle<Object> code = macro_assembler->GetCode();
848 array->set(RegExpImpl::kRe2kCodeIndex, *code);
687 work_list_ = NULL; 849 work_list_ = NULL;
688 return array; 850 return array;
689 } 851 }
690 852
691 853
692 void RegExpNode::GoTo(RegExpCompiler* compiler) { 854 bool RegExpNode::GoTo(RegExpCompiler* compiler) {
693 if (label.is_bound()) { 855 if (label.is_bound()) {
694 compiler->macro_assembler()->GoTo(&label); 856 compiler->macro_assembler()->GoTo(&label);
857 return true;
695 } else { 858 } else {
696 Emit(compiler); 859 return Emit(compiler);
697 } 860 }
698 } 861 }
699 862
700 863
701 void RegExpNode::EmitAddress(RegExpCompiler* compiler) { 864 void RegExpNode::EmitAddress(RegExpCompiler* compiler) {
702 compiler->macro_assembler()->EmitOrLink(&label); 865 compiler->macro_assembler()->EmitOrLink(&label);
703 } 866 }
704 867
705 868
706 EndNode EndNode::kAccept(ACCEPT); 869 EndNode EndNode::kAccept(ACCEPT);
707 EndNode EndNode::kBacktrack(BACKTRACK); 870 EndNode EndNode::kBacktrack(BACKTRACK);
708 871
709 872
873 bool EndNode::Emit(RegExpCompiler* compiler) {
874 switch (action_) {
875 case ACCEPT:
876 compiler->macro_assembler()->Succeed();
877 return true;
878 case BACKTRACK:
879 compiler->macro_assembler()->Backtrack();
880 return true;
881 }
882 return false;
883 }
884
885
710 void GuardedAlternative::AddGuard(Guard* guard) { 886 void GuardedAlternative::AddGuard(Guard* guard) {
711 if (guards_ == NULL) 887 if (guards_ == NULL)
712 guards_ = new ZoneList<Guard*>(1); 888 guards_ = new ZoneList<Guard*>(1);
713 guards_->Add(guard); 889 guards_->Add(guard);
714 } 890 }
715 891
716 892
717 ActionNode* ActionNode::StoreRegister(int reg, 893 ActionNode* ActionNode::StoreRegister(int reg,
718 int val, 894 int val,
719 RegExpNode* on_success) { 895 RegExpNode* on_success) {
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
775 visitor->Visit##Type(this); \ 951 visitor->Visit##Type(this); \
776 } 952 }
777 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 953 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
778 #undef DEFINE_ACCEPT 954 #undef DEFINE_ACCEPT
779 955
780 956
781 // ------------------------------------------------------------------- 957 // -------------------------------------------------------------------
782 // Emit code. 958 // Emit code.
783 959
784 960
785 void ChoiceNode::Emit(RegExpCompiler* compiler) { 961 bool ChoiceNode::Emit(RegExpCompiler* compiler) {
786 // TODO(erikcorry): Implement this. 962 // TODO(erikcorry): Implement this.
787 UNREACHABLE(); 963 return false;
788 } 964 }
789 965
790 966
791 void ActionNode::Emit(RegExpCompiler* compiler) { 967 bool ActionNode::Emit(RegExpCompiler* compiler) {
792 RegExpMacroAssembler* macro = compiler->macro_assembler(); 968 RegExpMacroAssembler* macro = compiler->macro_assembler();
793 switch (type_) { 969 switch (type_) {
794 case STORE_REGISTER: 970 case STORE_REGISTER:
795 macro->SetRegister(data_.u_store_register.reg, 971 macro->SetRegister(data_.u_store_register.reg,
796 data_.u_store_register.value); 972 data_.u_store_register.value);
797 break; 973 break;
798 case INCREMENT_REGISTER: 974 case INCREMENT_REGISTER:
799 macro->AdvanceRegister(data_.u_increment_register.reg, 1); 975 macro->AdvanceRegister(data_.u_increment_register.reg, 1);
800 break; 976 break;
801 case STORE_POSITION: 977 case STORE_POSITION:
802 macro->PushCurrentPosition(); 978 macro->PushCurrentPosition();
803 break; 979 break;
804 case RESTORE_POSITION: 980 case RESTORE_POSITION:
805 macro->PopCurrentPosition(); 981 macro->PopCurrentPosition();
806 break; 982 break;
807 case BEGIN_SUBMATCH: 983 case BEGIN_SUBMATCH:
808 // TODO(erikcorry): Implement this. 984 // TODO(erikcorry): Implement this.
809 UNREACHABLE(); 985 return false;
810 break;
811 case ESCAPE_SUBMATCH: 986 case ESCAPE_SUBMATCH:
812 // TODO(erikcorry): Implement this. 987 // TODO(erikcorry): Implement this.
813 UNREACHABLE(); 988 return false;
814 break;
815 case END_SUBMATCH: 989 case END_SUBMATCH:
816 // TODO(erikcorry): Implement this. 990 // TODO(erikcorry): Implement this.
991 return false;
992 default:
817 UNREACHABLE(); 993 UNREACHABLE();
818 break; 994 return false;
819 } 995 }
996 compiler->AddWork(on_success());
997 return true;
820 } 998 }
821 999
822 1000
823 // ------------------------------------------------------------------- 1001 // -------------------------------------------------------------------
824 // Dot/dotty output 1002 // Dot/dotty output
825 1003
826 1004
827 #ifdef DEBUG 1005 #ifdef DEBUG
828 1006
829 1007
(...skipping 773 matching lines...) Expand 10 before | Expand all | Expand 10 after
1603 outgoing.set_table(NULL); 1781 outgoing.set_table(NULL);
1604 outgoing.Analyze(that->on_success()); 1782 outgoing.Analyze(that->on_success());
1605 } 1783 }
1606 1784
1607 1785
1608 void Analysis::VisitAction(ActionNode* that) { 1786 void Analysis::VisitAction(ActionNode* that) {
1609 Analyze(that->on_success()); 1787 Analyze(that->on_success());
1610 } 1788 }
1611 1789
1612 1790
1613 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { 1791 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
1792 RegExpNode** node_return) {
1614 RegExpCompiler compiler(input->capture_count); 1793 RegExpCompiler compiler(input->capture_count);
1615 // Wrap the body of the regexp in capture #0. 1794 // Wrap the body of the regexp in capture #0.
1616 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 1795 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
1617 0, 1796 0,
1618 &compiler, 1797 &compiler,
1619 EndNode::GetAccept(), 1798 EndNode::GetAccept(),
1620 EndNode::GetBacktrack()); 1799 EndNode::GetBacktrack());
1621 // Add a .*? at the beginning, outside the body capture. 1800 // Add a .*? at the beginning, outside the body capture.
1622 // Note: We could choose to not add this if the regexp is anchored at 1801 // Note: We could choose to not add this if the regexp is anchored at
1623 // the start of the input but I'm not sure how best to do that and 1802 // the start of the input but I'm not sure how best to do that and
1624 // since we don't even handle ^ yet I'm saving that optimization for 1803 // since we don't even handle ^ yet I'm saving that optimization for
1625 // later. 1804 // later.
1626 RegExpNode* node = RegExpQuantifier::ToNode(0, 1805 RegExpNode* node = RegExpQuantifier::ToNode(0,
1627 RegExpQuantifier::kInfinity, 1806 RegExpQuantifier::kInfinity,
1628 false, 1807 false,
1629 new RegExpCharacterClass('.'), 1808 new RegExpCharacterClass('.'),
1630 &compiler, 1809 &compiler,
1631 captured_body, 1810 captured_body,
1632 EndNode::GetBacktrack()); 1811 EndNode::GetBacktrack());
1812 if (node_return != NULL) *node_return = node;
1633 Analysis analysis(&compiler); 1813 Analysis analysis(&compiler);
1634 analysis.Analyze(node); 1814 analysis.Analyze(node);
1635 return node; 1815 byte codes[10240];
1816 Re2kAssembler assembler(Vector<byte>(codes, 1024));
1817 RegExpMacroAssemblerRe2k macro_assembler(&assembler);
1818 return compiler.Assemble(&macro_assembler, node, input->capture_count);
1636 } 1819 }
1637 1820
1638 1821
1639 RegExpMacroAssembler::~RegExpMacroAssembler() { 1822 RegExpMacroAssembler::~RegExpMacroAssembler() {
1640 } 1823 }
1641 1824
1642 1825
1643 }} // namespace v8::internal 1826 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698