Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: src/jsregexp.cc

Issue 43075: * Reapply revisions 1383, 1384, 1391, 1398, 1401, 1402,... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
206 Handle<String> pattern, 206 Handle<String> pattern,
207 Handle<String> flag_str) { 207 Handle<String> flag_str) {
208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
210 bool in_cache = !cached.is_null(); 210 bool in_cache = !cached.is_null();
211 LOG(RegExpCompileEvent(re, in_cache)); 211 LOG(RegExpCompileEvent(re, in_cache));
212 212
213 Handle<Object> result; 213 Handle<Object> result;
214 if (in_cache) { 214 if (in_cache) {
215 re->set_data(*cached); 215 re->set_data(*cached);
216 result = re; 216 return re;
217 } else { 217 }
218 FlattenString(pattern); 218 FlattenString(pattern);
219 ZoneScope zone_scope(DELETE_ON_EXIT); 219 ZoneScope zone_scope(DELETE_ON_EXIT);
220 RegExpCompileData parse_result; 220 RegExpCompileData parse_result;
221 FlatStringReader reader(pattern); 221 FlatStringReader reader(pattern);
222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
223 // Throw an exception if we fail to parse the pattern. 223 // Throw an exception if we fail to parse the pattern.
224 ThrowRegExpException(re, 224 ThrowRegExpException(re,
225 pattern, 225 pattern,
226 parse_result.error, 226 parse_result.error,
227 "malformed_regexp"); 227 "malformed_regexp");
228 return Handle<Object>::null(); 228 return Handle<Object>::null();
229 }
230
231 if (parse_result.simple && !flags.is_ignore_case()) {
232 // Parse-tree is a single atom that is equal to the pattern.
233 result = AtomCompile(re, pattern, flags, pattern);
234 } else if (parse_result.tree->IsAtom() &&
235 !flags.is_ignore_case() &&
236 parse_result.capture_count == 0) {
237 RegExpAtom* atom = parse_result.tree->AsAtom();
238 Vector<const uc16> atom_pattern = atom->data();
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
240 result = AtomCompile(re, pattern, flags, atom_string);
241 } else {
242 result = IrregexpPrepare(re, pattern, flags);
243 }
244 Object* data = re->data();
245 if (data->IsFixedArray()) {
246 // If compilation succeeded then the data is set on the regexp
247 // and we can store it in the cache.
248 Handle<FixedArray> data(FixedArray::cast(re->data()));
249 CompilationCache::PutRegExp(pattern, flags, data);
250 }
251 } 229 }
252 230
253 return result; 231 if (parse_result.simple && !flags.is_ignore_case()) {
232 // Parse-tree is a single atom that is equal to the pattern.
233 AtomCompile(re, pattern, flags, pattern);
234 } else if (parse_result.tree->IsAtom() &&
235 !flags.is_ignore_case() &&
236 parse_result.capture_count == 0) {
237 RegExpAtom* atom = parse_result.tree->AsAtom();
238 Vector<const uc16> atom_pattern = atom->data();
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
240 AtomCompile(re, pattern, flags, atom_string);
241 } else {
242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
243 }
244 ASSERT(re->data()->IsFixedArray());
245 // Compilation succeeded so the data is set on the regexp
246 // and we can store it in the cache.
247 Handle<FixedArray> data(FixedArray::cast(re->data()));
248 CompilationCache::PutRegExp(pattern, flags, data);
249
250 return re;
254 } 251 }
255 252
256 253
257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
258 Handle<String> subject, 255 Handle<String> subject,
259 Handle<Object> index) { 256 int index,
257 Handle<JSArray> last_match_info) {
260 switch (regexp->TypeTag()) { 258 switch (regexp->TypeTag()) {
261 case JSRegExp::ATOM: 259 case JSRegExp::ATOM:
262 return AtomExec(regexp, subject, index); 260 return AtomExec(regexp, subject, index, last_match_info);
263 case JSRegExp::IRREGEXP: { 261 case JSRegExp::IRREGEXP: {
264 Handle<Object> result = IrregexpExec(regexp, subject, index); 262 Handle<Object> result =
263 IrregexpExec(regexp, subject, index, last_match_info);
265 ASSERT(!result.is_null() || Top::has_pending_exception()); 264 ASSERT(!result.is_null() || Top::has_pending_exception());
266 return result; 265 return result;
267 } 266 }
268 default: 267 default:
269 UNREACHABLE(); 268 UNREACHABLE();
270 return Handle<Object>::null(); 269 return Handle<Object>::null();
271 } 270 }
272 } 271 }
273 272
274 273
275 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
276 Handle<String> subject) { 275 Handle<String> subject,
276 Handle<JSArray> last_match_info) {
277 switch (regexp->TypeTag()) { 277 switch (regexp->TypeTag()) {
278 case JSRegExp::ATOM: 278 case JSRegExp::ATOM:
279 return AtomExecGlobal(regexp, subject); 279 return AtomExecGlobal(regexp, subject, last_match_info);
280 case JSRegExp::IRREGEXP: { 280 case JSRegExp::IRREGEXP: {
281 Handle<Object> result = IrregexpExecGlobal(regexp, subject); 281 Handle<Object> result =
282 IrregexpExecGlobal(regexp, subject, last_match_info);
282 ASSERT(!result.is_null() || Top::has_pending_exception()); 283 ASSERT(!result.is_null() || Top::has_pending_exception());
283 return result; 284 return result;
284 } 285 }
285 default: 286 default:
286 UNREACHABLE(); 287 UNREACHABLE();
287 return Handle<Object>::null(); 288 return Handle<Object>::null();
288 } 289 }
289 } 290 }
290 291
291 292
292 // RegExp Atom implementation: Simple string search using indexOf. 293 // RegExp Atom implementation: Simple string search using indexOf.
293 294
294 295
295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, 296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
296 Handle<String> pattern, 297 Handle<String> pattern,
297 JSRegExp::Flags flags, 298 JSRegExp::Flags flags,
298 Handle<String> match_pattern) { 299 Handle<String> match_pattern) {
299 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); 300 Factory::SetRegExpAtomData(re,
300 return re; 301 JSRegExp::ATOM,
302 pattern,
303 flags,
304 match_pattern);
305 }
306
307
308 static void SetAtomLastCapture(FixedArray* array,
309 String* subject,
310 int from,
311 int to) {
312 NoHandleAllocation no_handles;
313 RegExpImpl::SetLastCaptureCount(array, 2);
314 RegExpImpl::SetLastSubject(array, subject);
315 RegExpImpl::SetLastInput(array, subject);
316 RegExpImpl::SetCapture(array, 0, from);
317 RegExpImpl::SetCapture(array, 1, to);
301 } 318 }
302 319
303 320
304 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 321 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
305 Handle<String> subject, 322 Handle<String> subject,
306 Handle<Object> index) { 323 int index,
324 Handle<JSArray> last_match_info) {
307 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 325 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
308 326
309 uint32_t start_index; 327 uint32_t start_index = index;
310 if (!Array::IndexFromObject(*index, &start_index)) {
311 return Handle<Smi>(Smi::FromInt(-1));
312 }
313 328
314 int value = Runtime::StringMatch(subject, needle, start_index); 329 int value = Runtime::StringMatch(subject, needle, start_index);
315 if (value == -1) return Factory::null_value(); 330 if (value == -1) return Factory::null_value();
331 ASSERT(last_match_info->HasFastElements());
316 332
317 Handle<FixedArray> array = Factory::NewFixedArray(2); 333 {
318 array->set(0, Smi::FromInt(value)); 334 NoHandleAllocation no_handles;
319 array->set(1, Smi::FromInt(value + needle->length())); 335 FixedArray* array = last_match_info->elements();
320 return Factory::NewJSArrayWithElements(array); 336 SetAtomLastCapture(array, *subject, value, value + needle->length());
337 }
338 return last_match_info;
321 } 339 }
322 340
323 341
324 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, 342 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
325 Handle<String> subject) { 343 Handle<String> subject,
344 Handle<JSArray> last_match_info) {
326 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 345 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
346 ASSERT(last_match_info->HasFastElements());
327 Handle<JSArray> result = Factory::NewJSArray(1); 347 Handle<JSArray> result = Factory::NewJSArray(1);
328 int index = 0; 348 int index = 0;
329 int match_count = 0; 349 int match_count = 0;
330 int subject_length = subject->length(); 350 int subject_length = subject->length();
331 int needle_length = needle->length(); 351 int needle_length = needle->length();
352 int last_value = -1;
332 while (true) { 353 while (true) {
354 HandleScope scope;
333 int value = -1; 355 int value = -1;
334 if (index + needle_length <= subject_length) { 356 if (index + needle_length <= subject_length) {
335 value = Runtime::StringMatch(subject, needle, index); 357 value = Runtime::StringMatch(subject, needle, index);
336 } 358 }
337 if (value == -1) break; 359 if (value == -1) {
338 HandleScope scope; 360 if (last_value != -1) {
361 Handle<FixedArray> array(last_match_info->elements());
362 SetAtomLastCapture(*array,
363 *subject,
364 last_value,
365 last_value + needle->length());
366 }
367 break;
368 }
369
339 int end = value + needle_length; 370 int end = value + needle_length;
340 371
341 Handle<FixedArray> array = Factory::NewFixedArray(2); 372 // Create an array that looks like the static last_match_info array
342 array->set(0, Smi::FromInt(value)); 373 // that is attached to the global RegExp object. We will be returning
343 array->set(1, Smi::FromInt(end)); 374 // an array of these.
375 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
376 SetCapture(*array, 0, value);
377 SetCapture(*array, 1, end);
378 SetLastCaptureCount(*array, 2);
344 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); 379 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
345 SetElement(result, match_count, pair); 380 SetElement(result, match_count, pair);
346 match_count++; 381 match_count++;
347 index = end; 382 index = end;
348 if (needle_length == 0) index++; 383 if (needle_length == 0) index++;
384 last_value = value;
349 } 385 }
350 return result; 386 return result;
351 } 387 }
352 388
353 389
354 // Irregexp implementation. 390 // Irregexp implementation.
355 391
356 392
357 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII 393 // Ensures that the regexp object contains a compiled version of the
358 // strings. If the compiled version doesn't already exist, it is compiled 394 // source for either ASCII or non-ASCII strings.
395 // If the compiled version doesn't already exist, it is compiled
359 // from the source pattern. 396 // from the source pattern.
360 // Irregexp is not feature complete yet. If there is something in the 397 // If compilation fails, an exception is thrown and this function
361 // regexp that the compiler cannot currently handle, an empty 398 // returns false.
362 // handle is returned, but no exception is thrown. 399 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
363 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, 400 bool is_ascii) {
364 bool is_ascii) { 401 int index;
365 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); 402 if (is_ascii) {
366 Handle<FixedArray> alternatives( 403 index = JSRegExp::kIrregexpASCIICodeIndex;
367 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); 404 } else {
368 ASSERT_EQ(2, alternatives->length()); 405 index = JSRegExp::kIrregexpUC16CodeIndex;
369 406 }
370 int index = is_ascii ? 0 : 1; 407 Object* entry = re->DataAt(index);
371 Object* entry = alternatives->get(index); 408 if (!entry->IsTheHole()) {
372 if (!entry->IsNull()) { 409 // A value has already been compiled.
373 return Handle<FixedArray>(FixedArray::cast(entry)); 410 if (entry->IsJSObject()) {
411 // If it's a JS value, it's an error.
412 Top::Throw(entry);
413 return false;
414 }
415 return true;
374 } 416 }
375 417
376 // Compile the RegExp. 418 // Compile the RegExp.
377 ZoneScope zone_scope(DELETE_ON_EXIT); 419 ZoneScope zone_scope(DELETE_ON_EXIT);
378 420
379 JSRegExp::Flags flags = re->GetFlags(); 421 JSRegExp::Flags flags = re->GetFlags();
380 422
381 Handle<String> pattern(re->Pattern()); 423 Handle<String> pattern(re->Pattern());
382 if (!pattern->IsFlat(StringShape(*pattern))) { 424 if (!pattern->IsFlat(StringShape(*pattern))) {
383 FlattenString(pattern); 425 FlattenString(pattern);
384 } 426 }
385 427
386 RegExpCompileData compile_data; 428 RegExpCompileData compile_data;
387 FlatStringReader reader(pattern); 429 FlatStringReader reader(pattern);
388 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { 430 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
389 // Throw an exception if we fail to parse the pattern. 431 // Throw an exception if we fail to parse the pattern.
390 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. 432 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
391 ThrowRegExpException(re, 433 ThrowRegExpException(re,
392 pattern, 434 pattern,
393 compile_data.error, 435 compile_data.error,
394 "malformed_regexp"); 436 "malformed_regexp");
395 return Handle<FixedArray>::null(); 437 return false;
396 } 438 }
397 Handle<FixedArray> compiled_entry = 439 RegExpEngine::CompilationResult result =
398 RegExpEngine::Compile(&compile_data, 440 RegExpEngine::Compile(&compile_data,
399 flags.is_ignore_case(), 441 flags.is_ignore_case(),
400 flags.is_multiline(), 442 flags.is_multiline(),
401 pattern, 443 pattern,
402 is_ascii); 444 is_ascii);
403 if (!compiled_entry.is_null()) { 445 if (result.error_message != NULL) {
404 alternatives->set(index, *compiled_entry); 446 // Unable to compile regexp.
447 Handle<JSArray> array = Factory::NewJSArray(2);
448 SetElement(array, 0, pattern);
449 SetElement(array,
450 1,
451 Factory::NewStringFromUtf8(CStrVector(result.error_message)));
452 Handle<Object> regexp_err =
453 Factory::NewSyntaxError("malformed_regexp", array);
454 Top::Throw(*regexp_err);
455 re->SetDataAt(index, *regexp_err);
456 return false;
405 } 457 }
406 return compiled_entry; 458
459 NoHandleAllocation no_handles;
460
461 FixedArray* data = FixedArray::cast(re->data());
462 data->set(index, result.code);
463 int register_max = IrregexpMaxRegisterCount(data);
464 if (result.num_registers > register_max) {
465 SetIrregexpMaxRegisterCount(data, result.num_registers);
466 }
467
468 return true;
407 } 469 }
408 470
409 471
410 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { 472 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
411 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); 473 return Smi::cast(
474 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
412 } 475 }
413 476
414 477
415 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { 478 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
416 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); 479 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
417 } 480 }
418 481
419 482
420 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { 483 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
421 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() 484 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
422 == RegExpMacroAssembler::kBytecodeImplementation);
423 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
424 } 485 }
425 486
426 487
427 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { 488 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
428 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() 489 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
429 != RegExpMacroAssembler::kBytecodeImplementation);
430 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
431 } 490 }
432 491
433 492
434 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, 493 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
435 Handle<String> pattern, 494 int index;
436 JSRegExp::Flags flags) { 495 if (is_ascii) {
437 // Make space for ASCII and UC16 versions. 496 index = JSRegExp::kIrregexpASCIICodeIndex;
438 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); 497 } else {
439 alternatives->set_null(0); 498 index = JSRegExp::kIrregexpUC16CodeIndex;
440 alternatives->set_null(1); 499 }
441 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); 500 return ByteArray::cast(re->get(index));
442 return re; 501 }
502
503
504 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
505 int index;
506 if (is_ascii) {
507 index = JSRegExp::kIrregexpASCIICodeIndex;
508 } else {
509 index = JSRegExp::kIrregexpUC16CodeIndex;
510 }
511 return Code::cast(re->get(index));
512 }
513
514
515 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
516 Handle<String> pattern,
517 JSRegExp::Flags flags,
518 int capture_count) {
519 // Initialize compiled code entries to null.
520 Factory::SetRegExpIrregexpData(re,
521 JSRegExp::IRREGEXP,
522 pattern,
523 flags,
524 capture_count);
443 } 525 }
444 526
445 527
446 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, 528 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
447 Handle<String> subject, 529 Handle<String> subject,
448 Handle<Object> index) { 530 int index,
531 Handle<JSArray> last_match_info) {
449 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 532 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
450 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
451 533
452 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 534 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
453 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); 535 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
454 if (irregexp.is_null()) {
455 // We can't handle the RegExp with IRRegExp.
456 return Handle<Object>::null(); 536 return Handle<Object>::null();
457 } 537 }
458 538
459 // Prepare space for the return values. 539 // Prepare space for the return values.
460 int number_of_registers = IrregexpNumberOfRegisters(irregexp); 540 Handle<FixedArray> re_data(FixedArray::cast(regexp->data()));
461 OffsetsVector offsets(number_of_registers); 541 int number_of_capture_registers =
542 (IrregexpNumberOfCaptures(*re_data) + 1) * 2;
543 OffsetsVector offsets(number_of_capture_registers);
462 544
463 int num_captures = IrregexpNumberOfCaptures(irregexp); 545 int previous_index = index;
464
465 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
466 546
467 #ifdef DEBUG 547 #ifdef DEBUG
468 if (FLAG_trace_regexp_bytecodes) { 548 if (FLAG_trace_regexp_bytecodes) {
469 String* pattern = regexp->Pattern(); 549 String* pattern = regexp->Pattern();
470 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 550 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
471 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 551 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
472 } 552 }
473 #endif 553 #endif
474 554
475 if (!subject->IsFlat(StringShape(*subject))) { 555 if (!subject->IsFlat(StringShape(*subject))) {
476 FlattenString(subject); 556 FlattenString(subject);
477 } 557 }
478 558
479 return IrregexpExecOnce(irregexp, 559 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
480 num_captures, 560
561 return IrregexpExecOnce(re_data,
562 number_of_capture_registers,
563 last_match_info,
481 subject, 564 subject,
482 previous_index, 565 previous_index,
483 offsets.vector(), 566 offsets.vector(),
484 offsets.length()); 567 offsets.length());
485 } 568 }
486 569
487 570
488 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, 571 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
489 Handle<String> subject) { 572 Handle<String> subject,
573 Handle<JSArray> last_match_info) {
490 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 574 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
575 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
491 576
492 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 577 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
493 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); 578 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
494 if (irregexp.is_null()) {
495 return Handle<Object>::null(); 579 return Handle<Object>::null();
496 } 580 }
497 581
498 // Prepare space for the return values. 582 // Prepare space for the return values.
499 int number_of_registers = IrregexpNumberOfRegisters(irregexp); 583 int number_of_capture_registers =
500 OffsetsVector offsets(number_of_registers); 584 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
585 OffsetsVector offsets(number_of_capture_registers);
501 586
502 int previous_index = 0; 587 int previous_index = 0;
503 588
504 Handle<JSArray> result = Factory::NewJSArray(0); 589 Handle<JSArray> result = Factory::NewJSArray(0);
505 int i = 0; 590 int result_length = 0;
506 Handle<Object> matches; 591 Handle<Object> matches;
507 592
508 if (!subject->IsFlat(StringShape(*subject))) { 593 if (!subject->IsFlat(StringShape(*subject))) {
509 FlattenString(subject); 594 FlattenString(subject);
510 } 595 }
511 596
597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
598
512 while (true) { 599 while (true) {
513 if (previous_index > subject->length() || previous_index < 0) { 600 if (previous_index > subject->length() || previous_index < 0) {
514 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
515 // string length, there is no match. 602 // string length, there is no match.
516 matches = Factory::null_value(); 603 matches = Factory::null_value();
Mads Ager (chromium) 2009/03/11 13:49:17 I know this is not your code, but why is there an
Erik Corry 2009/03/11 14:01:06 Lasse has a patch waiting that also fixes this.
517 return result; 604 return result;
518 } else { 605 } else {
519 #ifdef DEBUG 606 #ifdef DEBUG
520 if (FLAG_trace_regexp_bytecodes) { 607 if (FLAG_trace_regexp_bytecodes) {
521 String* pattern = regexp->Pattern(); 608 String* pattern = regexp->Pattern();
522 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
523 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
524 } 611 }
525 #endif 612 #endif
613 HandleScope scope;
526 matches = IrregexpExecOnce(irregexp, 614 matches = IrregexpExecOnce(irregexp,
527 IrregexpNumberOfCaptures(irregexp), 615 number_of_capture_registers,
616 last_match_info,
528 subject, 617 subject,
529 previous_index, 618 previous_index,
530 offsets.vector(), 619 offsets.vector(),
531 offsets.length()); 620 offsets.length());
532 621
533 if (matches.is_null()) { 622 if (matches.is_null()) {
534 ASSERT(Top::has_pending_exception()); 623 ASSERT(Top::has_pending_exception());
535 return matches; 624 return matches;
536 } 625 }
537 626
538 if (matches->IsJSArray()) { 627 if (matches->IsJSArray()) {
539 SetElement(result, i, matches); 628 // Create an array that looks like the static last_match_info array
540 i++; 629 // that is attached to the global RegExp object. We will be returning
541 previous_index = offsets.vector()[1]; 630 // an array of these.
542 if (offsets.vector()[0] == offsets.vector()[1]) { 631 Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements());
632 Handle<JSArray> latest_match =
633 Factory::NewJSArray(kFirstCapture + number_of_capture_registers);
634 Handle<FixedArray> latest_match_array(latest_match->elements());
635
636 for (int i = 0; i < number_of_capture_registers; i++) {
637 SetCapture(*latest_match_array, i, GetCapture(*matches_array, i));
638 }
639 SetLastCaptureCount(*latest_match_array, number_of_capture_registers);
640
641 SetElement(result, result_length, latest_match);
642 result_length++;
643 previous_index = GetCapture(*matches_array, 1);
644 if (GetCapture(*matches_array, 0) == previous_index)
543 previous_index++; 645 previous_index++;
544 } 646
545 } else { 647 } else {
546 ASSERT(matches->IsNull()); 648 ASSERT(matches->IsNull());
547 return result; 649 return result;
548 } 650 }
549 } 651 }
550 } 652 }
551 } 653 }
552 654
553 655
554 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, 656 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
555 int num_captures, 657 int number_of_capture_registers,
658 Handle<JSArray> last_match_info,
556 Handle<String> subject, 659 Handle<String> subject,
557 int previous_index, 660 int previous_index,
558 int* offsets_vector, 661 int* offsets_vector,
559 int offsets_vector_length) { 662 int offsets_vector_length) {
560 ASSERT(subject->IsFlat(StringShape(*subject))); 663 StringShape shape(*subject);
664 ASSERT(subject->IsFlat(shape));
665 bool is_ascii = shape.IsAsciiRepresentation();
561 bool rc; 666 bool rc;
562 667
563 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); 668 Handle<String> original_subject = subject;
669 if (FLAG_regexp_native) {
670 #ifndef ARM
671 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii));
564 672
565 switch (tag) { 673 // Character offsets into string.
566 case RegExpMacroAssembler::kIA32Implementation: { 674 int start_offset = previous_index;
567 #ifndef ARM 675 int end_offset = subject->length(shape);
568 Handle<Code> code = IrregexpNativeCode(irregexp);
569 676
570 StringShape shape(*subject); 677 if (shape.IsCons()) {
678 subject = Handle<String>(ConsString::cast(*subject)->first());
679 } else if (shape.IsSliced()) {
680 SlicedString* slice = SlicedString::cast(*subject);
681 start_offset += slice->start();
682 end_offset += slice->start();
683 subject = Handle<String>(slice->buffer());
684 }
571 685
572 // Character offsets into string. 686 // String is now either Sequential or External
573 int start_offset = previous_index; 687 StringShape flatshape(*subject);
574 int end_offset = subject->length(shape); 688 bool is_ascii = flatshape.IsAsciiRepresentation();
689 int char_size_shift = is_ascii ? 0 : 1;
575 690
576 if (shape.IsCons()) { 691 RegExpMacroAssemblerIA32::Result res;
577 subject = Handle<String>(ConsString::cast(*subject)->first()); 692
578 } else if (shape.IsSliced()) { 693 if (flatshape.IsExternal()) {
579 SlicedString* slice = SlicedString::cast(*subject); 694 const byte* address;
580 start_offset += slice->start(); 695 if (is_ascii) {
581 end_offset += slice->start(); 696 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
582 subject = Handle<String>(slice->buffer()); 697 address = reinterpret_cast<const byte*>(ext->resource()->data());
698 } else {
699 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
700 address = reinterpret_cast<const byte*>(ext->resource()->data());
583 } 701 }
702 res = RegExpMacroAssemblerIA32::Execute(
703 *code,
704 const_cast<Address*>(&address),
705 start_offset << char_size_shift,
706 end_offset << char_size_shift,
707 offsets_vector,
708 previous_index == 0);
709 } else { // Sequential string
710 ASSERT(StringShape(*subject).IsSequential());
711 Address char_address =
712 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
713 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
714 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
715 res = RegExpMacroAssemblerIA32::Execute(
716 *code,
717 reinterpret_cast<Address*>(subject.location()),
718 byte_offset + (start_offset << char_size_shift),
719 byte_offset + (end_offset << char_size_shift),
720 offsets_vector,
721 previous_index == 0);
722 }
584 723
585 // String is now either Sequential or External 724 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
586 StringShape flatshape(*subject); 725 ASSERT(Top::has_pending_exception());
587 bool is_ascii = flatshape.IsAsciiRepresentation(); 726 return Handle<Object>::null();
588 int char_size_shift = is_ascii ? 0 : 1; 727 }
728 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
589 729
590 RegExpMacroAssemblerIA32::Result res; 730 if (rc) {
591 731 // Capture values are relative to start_offset only.
592 if (flatshape.IsExternal()) { 732 for (int i = 0; i < offsets_vector_length; i++) {
593 const byte* address; 733 if (offsets_vector[i] >= 0) {
594 if (is_ascii) { 734 offsets_vector[i] += previous_index;
595 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
596 address = reinterpret_cast<const byte*>(ext->resource()->data());
597 } else {
598 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
599 address = reinterpret_cast<const byte*>(ext->resource()->data());
600 }
601 res = RegExpMacroAssemblerIA32::Execute(
602 *code,
603 const_cast<Address*>(&address),
604 start_offset << char_size_shift,
605 end_offset << char_size_shift,
606 offsets_vector,
607 previous_index == 0);
608 } else { // Sequential string
609 ASSERT(StringShape(*subject).IsSequential());
610 Address char_address =
611 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
612 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
613 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
614 res = RegExpMacroAssemblerIA32::Execute(
615 *code,
616 reinterpret_cast<Address*>(subject.location()),
617 byte_offset + (start_offset << char_size_shift),
618 byte_offset + (end_offset << char_size_shift),
619 offsets_vector,
620 previous_index == 0);
621 }
622
623 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
624 ASSERT(Top::has_pending_exception());
625 return Handle<Object>::null();
626 }
627 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
628
629 if (rc) {
630 // Capture values are relative to start_offset only.
631 for (int i = 0; i < offsets_vector_length; i++) {
632 if (offsets_vector[i] >= 0) {
633 offsets_vector[i] += previous_index;
634 }
635 } 735 }
636 } 736 }
637 break; 737 }
738 } else {
638 #else 739 #else
639 UNIMPLEMENTED(); 740 // Unimplemented on ARM, fall through to bytecode.
Mads Ager (chromium) 2009/03/11 13:49:17 Auch, this is hard to read. Can we factor this di
Lasse Reichstein 2009/03/11 13:54:03 I'm all for factoring it differently, but I think
Erik Corry 2009/03/11 14:01:06 I'll leave it alone for now.
640 rc = false; 741 }
641 break; 742 {
642 #endif 743 #endif
744 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
745 offsets_vector[i] = -1;
643 } 746 }
644 case RegExpMacroAssembler::kBytecodeImplementation: { 747 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
645 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
646 offsets_vector[i] = -1;
647 }
648 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
649 748
650 rc = IrregexpInterpreter::Match(byte_codes, 749 rc = IrregexpInterpreter::Match(byte_codes,
651 subject, 750 subject,
652 offsets_vector, 751 offsets_vector,
653 previous_index); 752 previous_index);
654 break;
655 }
656 case RegExpMacroAssembler::kARMImplementation:
657 default:
658 UNREACHABLE();
659 rc = false;
660 break;
661 } 753 }
662 754
663 if (!rc) { 755 if (!rc) {
664 return Factory::null_value(); 756 return Factory::null_value();
665 } 757 }
666 758
667 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); 759 FixedArray* array = last_match_info->elements();
760 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
668 // The captures come in (start, end+1) pairs. 761 // The captures come in (start, end+1) pairs.
669 for (int i = 0; i < 2 * (num_captures + 1); i += 2) { 762 for (int i = 0; i < number_of_capture_registers; i += 2) {
670 array->set(i, Smi::FromInt(offsets_vector[i])); 763 SetCapture(array, i, offsets_vector[i]);
671 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); 764 SetCapture(array, i + 1, offsets_vector[i + 1]);
672 } 765 }
673 return Factory::NewJSArrayWithElements(array); 766 SetLastCaptureCount(array, number_of_capture_registers);
767 SetLastSubject(array, *original_subject);
768 SetLastInput(array, *original_subject);
769 return last_match_info;
674 } 770 }
675 771
676 772
677 // ------------------------------------------------------------------- 773 // -------------------------------------------------------------------
678 // Implmentation of the Irregexp regular expression engine. 774 // Implementation of the Irregexp regular expression engine.
679 // 775 //
680 // The Irregexp regular expression engine is intended to be a complete 776 // The Irregexp regular expression engine is intended to be a complete
681 // implementation of ECMAScript regular expressions. It generates either 777 // implementation of ECMAScript regular expressions. It generates either
682 // bytecodes or native code. 778 // bytecodes or native code.
683 779
684 // The Irregexp regexp engine is structured in three steps. 780 // The Irregexp regexp engine is structured in three steps.
685 // 1) The parser generates an abstract syntax tree. See ast.cc. 781 // 1) The parser generates an abstract syntax tree. See ast.cc.
686 // 2) From the AST a node network is created. The nodes are all 782 // 2) From the AST a node network is created. The nodes are all
687 // subclasses of RegExpNode. The nodes represent states when 783 // subclasses of RegExpNode. The nodes represent states when
688 // executing a regular expression. Several optimizations are 784 // executing a regular expression. Several optimizations are
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
885 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); 981 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
886 982
887 int AllocateRegister() { 983 int AllocateRegister() {
888 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 984 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
889 reg_exp_too_big_ = true; 985 reg_exp_too_big_ = true;
890 return next_register_; 986 return next_register_;
891 } 987 }
892 return next_register_++; 988 return next_register_++;
893 } 989 }
894 990
895 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 991 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
896 RegExpNode* start, 992 RegExpNode* start,
897 int capture_count, 993 int capture_count,
898 Handle<String> pattern); 994 Handle<String> pattern);
899 995
900 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 996 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
901 997
902 static const int kImplementationOffset = 0; 998 static const int kImplementationOffset = 0;
903 static const int kNumberOfRegistersOffset = 0; 999 static const int kNumberOfRegistersOffset = 0;
904 static const int kCodeOffset = 1; 1000 static const int kCodeOffset = 1;
905 1001
906 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 1002 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
907 EndNode* accept() { return accept_; } 1003 EndNode* accept() { return accept_; }
908 1004
(...skipping 24 matching lines...) Expand all
933 public: 1029 public:
934 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1030 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
935 compiler->IncrementRecursionDepth(); 1031 compiler->IncrementRecursionDepth();
936 } 1032 }
937 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 1033 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
938 private: 1034 private:
939 RegExpCompiler* compiler_; 1035 RegExpCompiler* compiler_;
940 }; 1036 };
941 1037
942 1038
943 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { 1039 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
944 Handle<JSArray> array = Factory::NewJSArray(2); 1040 return RegExpEngine::CompilationResult("RegExp too big");
945 SetElement(array, 0, pattern);
946 const char* message = "RegExp too big";
947 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
948 Handle<Object> regexp_err =
949 Factory::NewSyntaxError("malformed_regexp", array);
950 Top::Throw(*regexp_err);
951 return Handle<FixedArray>();
952 } 1041 }
953 1042
954 1043
955 // Attempts to compile the regexp using an Irregexp code generator. Returns 1044 // Attempts to compile the regexp using an Irregexp code generator. Returns
956 // a fixed array or a null handle depending on whether it succeeded. 1045 // a fixed array or a null handle depending on whether it succeeded.
957 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 1046 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
958 : next_register_(2 * (capture_count + 1)), 1047 : next_register_(2 * (capture_count + 1)),
959 work_list_(NULL), 1048 work_list_(NULL),
960 recursion_depth_(0), 1049 recursion_depth_(0),
961 ignore_case_(ignore_case), 1050 ignore_case_(ignore_case),
962 ascii_(ascii), 1051 ascii_(ascii),
963 reg_exp_too_big_(false) { 1052 reg_exp_too_big_(false) {
964 accept_ = new EndNode(EndNode::ACCEPT); 1053 accept_ = new EndNode(EndNode::ACCEPT);
965 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 1054 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
966 } 1055 }
967 1056
968 1057
969 Handle<FixedArray> RegExpCompiler::Assemble( 1058 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
970 RegExpMacroAssembler* macro_assembler, 1059 RegExpMacroAssembler* macro_assembler,
971 RegExpNode* start, 1060 RegExpNode* start,
972 int capture_count, 1061 int capture_count,
973 Handle<String> pattern) { 1062 Handle<String> pattern) {
974 #ifdef DEBUG 1063 #ifdef DEBUG
975 if (FLAG_trace_regexp_assembler) 1064 if (FLAG_trace_regexp_assembler)
976 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); 1065 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
977 else 1066 else
978 #endif 1067 #endif
979 macro_assembler_ = macro_assembler; 1068 macro_assembler_ = macro_assembler;
980 List <RegExpNode*> work_list(0); 1069 List <RegExpNode*> work_list(0);
981 work_list_ = &work_list; 1070 work_list_ = &work_list;
982 Label fail; 1071 Label fail;
983 macro_assembler_->PushBacktrack(&fail); 1072 macro_assembler_->PushBacktrack(&fail);
984 Trace new_trace; 1073 Trace new_trace;
985 start->Emit(this, &new_trace); 1074 start->Emit(this, &new_trace);
986 macro_assembler_->Bind(&fail); 1075 macro_assembler_->Bind(&fail);
987 macro_assembler_->Fail(); 1076 macro_assembler_->Fail();
988 while (!work_list.is_empty()) { 1077 while (!work_list.is_empty()) {
989 work_list.RemoveLast()->Emit(this, &new_trace); 1078 work_list.RemoveLast()->Emit(this, &new_trace);
990 } 1079 }
991 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); 1080 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
992 Handle<FixedArray> array = 1081
993 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
994 array->set(RegExpImpl::kIrregexpImplementationIndex,
995 Smi::FromInt(macro_assembler_->Implementation()));
996 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
997 Smi::FromInt(next_register_));
998 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
999 Smi::FromInt(capture_count));
1000 Handle<Object> code = macro_assembler_->GetCode(pattern); 1082 Handle<Object> code = macro_assembler_->GetCode(pattern);
1001 array->set(RegExpImpl::kIrregexpCodeIndex, *code); 1083
1002 work_list_ = NULL; 1084 work_list_ = NULL;
1003 #ifdef DEBUG 1085 #ifdef DEBUG
1004 if (FLAG_trace_regexp_assembler) { 1086 if (FLAG_trace_regexp_assembler) {
1005 delete macro_assembler_; 1087 delete macro_assembler_;
1006 } 1088 }
1007 #endif 1089 #endif
1008 return array; 1090 return RegExpEngine::CompilationResult(*code, next_register_);
1009 } 1091 }
1010 1092
1011 1093
1012 bool Trace::DeferredAction::Mentions(int that) { 1094 bool Trace::DeferredAction::Mentions(int that) {
1013 if (type() == ActionNode::CLEAR_CAPTURES) { 1095 if (type() == ActionNode::CLEAR_CAPTURES) {
1014 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 1096 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
1015 return range.Contains(that); 1097 return range.Contains(that);
1016 } else { 1098 } else {
1017 return reg() == that; 1099 return reg() == that;
1018 } 1100 }
(...skipping 2697 matching lines...) Expand 10 before | Expand all | Expand 10 after
3716 // x{f, t} becomes this: 3798 // x{f, t} becomes this:
3717 // 3799 //
3718 // (r++)<-. 3800 // (r++)<-.
3719 // | ` 3801 // | `
3720 // | (x) 3802 // | (x)
3721 // v ^ 3803 // v ^
3722 // (r=0)-->(?)---/ [if r < t] 3804 // (r=0)-->(?)---/ [if r < t]
3723 // | 3805 // |
3724 // [if r >= f] \----> ... 3806 // [if r >= f] \----> ...
3725 // 3807 //
3726 //
3727 // TODO(someone): clear captures on repetition and handle empty
3728 // matches.
3729 3808
3730 // 15.10.2.5 RepeatMatcher algorithm. 3809 // 15.10.2.5 RepeatMatcher algorithm.
3731 // The parser has already eliminated the case where max is 0. In the case 3810 // The parser has already eliminated the case where max is 0. In the case
3732 // where max_match is zero the parser has removed the quantifier if min was 3811 // where max_match is zero the parser has removed the quantifier if min was
3733 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. 3812 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
3734 3813
3735 // If we know that we cannot match zero length then things are a little 3814 // If we know that we cannot match zero length then things are a little
3736 // simpler since we don't need to make the special zero length match check 3815 // simpler since we don't need to make the special zero length match check
3737 // from step 2.1. If the min and max are small we can unroll a little in 3816 // from step 2.1. If the min and max are small we can unroll a little in
3738 // this case. 3817 // this case.
(...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after
4585 } 4664 }
4586 } 4665 }
4587 4666
4588 4667
4589 void DispatchTableConstructor::VisitAction(ActionNode* that) { 4668 void DispatchTableConstructor::VisitAction(ActionNode* that) {
4590 RegExpNode* target = that->on_success(); 4669 RegExpNode* target = that->on_success();
4591 target->Accept(this); 4670 target->Accept(this);
4592 } 4671 }
4593 4672
4594 4673
4595 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, 4674 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
4596 bool ignore_case, 4675 bool ignore_case,
4597 bool is_multiline, 4676 bool is_multiline,
4598 Handle<String> pattern, 4677 Handle<String> pattern,
4599 bool is_ascii) { 4678 bool is_ascii) {
4600 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 4679 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
4601 return IrregexpRegExpTooBig(pattern); 4680 return IrregexpRegExpTooBig();
4602 } 4681 }
4603 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 4682 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
4604 // Wrap the body of the regexp in capture #0. 4683 // Wrap the body of the regexp in capture #0.
4605 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4684 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
4606 0, 4685 0,
4607 &compiler, 4686 &compiler,
4608 compiler.accept()); 4687 compiler.accept());
4609 RegExpNode* node = captured_body; 4688 RegExpNode* node = captured_body;
4610 if (!data->tree->IsAnchored()) { 4689 if (!data->tree->IsAnchored()) {
4611 // Add a .*? at the beginning, outside the body capture, unless 4690 // Add a .*? at the beginning, outside the body capture, unless
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
4658 EmbeddedVector<byte, 1024> codes; 4737 EmbeddedVector<byte, 1024> codes;
4659 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4738 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4660 return compiler.Assemble(&macro_assembler, 4739 return compiler.Assemble(&macro_assembler,
4661 node, 4740 node,
4662 data->capture_count, 4741 data->capture_count,
4663 pattern); 4742 pattern);
4664 } 4743 }
4665 4744
4666 4745
4667 }} // namespace v8::internal 4746 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698