Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(169)

Side by Side Diff: src/jsregexp.cc

Issue 39186: Revert revisions 1383, 1384, 1391, 1398, 1401, 1402,... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/macros.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
206 Handle<String> pattern, 206 Handle<String> pattern,
207 Handle<String> flag_str) { 207 Handle<String> flag_str) {
208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
210 bool in_cache = !cached.is_null(); 210 bool in_cache = !cached.is_null();
211 LOG(RegExpCompileEvent(re, in_cache)); 211 LOG(RegExpCompileEvent(re, in_cache));
212 212
213 Handle<Object> result; 213 Handle<Object> result;
214 if (in_cache) { 214 if (in_cache) {
215 re->set_data(*cached); 215 re->set_data(*cached);
216 return re; 216 result = re;
217 } 217 } else {
218 FlattenString(pattern); 218 FlattenString(pattern);
219 ZoneScope zone_scope(DELETE_ON_EXIT); 219 ZoneScope zone_scope(DELETE_ON_EXIT);
220 RegExpCompileData parse_result; 220 RegExpCompileData parse_result;
221 FlatStringReader reader(pattern); 221 FlatStringReader reader(pattern);
222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
223 // Throw an exception if we fail to parse the pattern. 223 // Throw an exception if we fail to parse the pattern.
224 ThrowRegExpException(re, 224 ThrowRegExpException(re,
225 pattern, 225 pattern,
226 parse_result.error, 226 parse_result.error,
227 "malformed_regexp"); 227 "malformed_regexp");
228 return Handle<Object>::null(); 228 return Handle<Object>::null();
229 }
230
231 if (parse_result.simple && !flags.is_ignore_case()) {
232 // Parse-tree is a single atom that is equal to the pattern.
233 result = AtomCompile(re, pattern, flags, pattern);
234 } else if (parse_result.tree->IsAtom() &&
235 !flags.is_ignore_case() &&
236 parse_result.capture_count == 0) {
237 RegExpAtom* atom = parse_result.tree->AsAtom();
238 Vector<const uc16> atom_pattern = atom->data();
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
240 result = AtomCompile(re, pattern, flags, atom_string);
241 } else {
242 result = IrregexpPrepare(re, pattern, flags);
243 }
244 Object* data = re->data();
245 if (data->IsFixedArray()) {
246 // If compilation succeeded then the data is set on the regexp
247 // and we can store it in the cache.
248 Handle<FixedArray> data(FixedArray::cast(re->data()));
249 CompilationCache::PutRegExp(pattern, flags, data);
250 }
229 } 251 }
230 252
231 if (parse_result.simple && !flags.is_ignore_case()) { 253 return result;
232 // Parse-tree is a single atom that is equal to the pattern.
233 AtomCompile(re, pattern, flags, pattern);
234 } else if (parse_result.tree->IsAtom() &&
235 !flags.is_ignore_case() &&
236 parse_result.capture_count == 0) {
237 RegExpAtom* atom = parse_result.tree->AsAtom();
238 Vector<const uc16> atom_pattern = atom->data();
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
240 AtomCompile(re, pattern, flags, atom_string);
241 } else {
242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
243 }
244 ASSERT(re->data()->IsFixedArray());
245 // Compilation succeeded so the data is set on the regexp
246 // and we can store it in the cache.
247 Handle<FixedArray> data(FixedArray::cast(re->data()));
248 CompilationCache::PutRegExp(pattern, flags, data);
249
250 return re;
251 } 254 }
252 255
253 256
254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
255 Handle<String> subject, 258 Handle<String> subject,
256 int index, 259 Handle<Object> index) {
257 Handle<JSArray> last_match_info) {
258 switch (regexp->TypeTag()) { 260 switch (regexp->TypeTag()) {
259 case JSRegExp::ATOM: 261 case JSRegExp::ATOM:
260 return AtomExec(regexp, subject, index, last_match_info); 262 return AtomExec(regexp, subject, index);
261 case JSRegExp::IRREGEXP: { 263 case JSRegExp::IRREGEXP: {
262 Handle<Object> result = 264 Handle<Object> result = IrregexpExec(regexp, subject, index);
263 IrregexpExec(regexp, subject, index, last_match_info);
264 ASSERT(!result.is_null() || Top::has_pending_exception()); 265 ASSERT(!result.is_null() || Top::has_pending_exception());
265 return result; 266 return result;
266 } 267 }
267 default: 268 default:
268 UNREACHABLE(); 269 UNREACHABLE();
269 return Handle<Object>::null(); 270 return Handle<Object>::null();
270 } 271 }
271 } 272 }
272 273
273 274
274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 275 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
275 Handle<String> subject, 276 Handle<String> subject) {
276 Handle<JSArray> last_match_info) {
277 switch (regexp->TypeTag()) { 277 switch (regexp->TypeTag()) {
278 case JSRegExp::ATOM: 278 case JSRegExp::ATOM:
279 return AtomExecGlobal(regexp, subject, last_match_info); 279 return AtomExecGlobal(regexp, subject);
280 case JSRegExp::IRREGEXP: { 280 case JSRegExp::IRREGEXP: {
281 Handle<Object> result = 281 Handle<Object> result = IrregexpExecGlobal(regexp, subject);
282 IrregexpExecGlobal(regexp, subject, last_match_info);
283 ASSERT(!result.is_null() || Top::has_pending_exception()); 282 ASSERT(!result.is_null() || Top::has_pending_exception());
284 return result; 283 return result;
285 } 284 }
286 default: 285 default:
287 UNREACHABLE(); 286 UNREACHABLE();
288 return Handle<Object>::null(); 287 return Handle<Object>::null();
289 } 288 }
290 } 289 }
291 290
292 291
293 // RegExp Atom implementation: Simple string search using indexOf. 292 // RegExp Atom implementation: Simple string search using indexOf.
294 293
295 294
296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, 295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
297 Handle<String> pattern, 296 Handle<String> pattern,
298 JSRegExp::Flags flags, 297 JSRegExp::Flags flags,
299 Handle<String> match_pattern) { 298 Handle<String> match_pattern) {
300 Factory::SetRegExpAtomData(re, 299 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);
301 JSRegExp::ATOM, 300 return re;
302 pattern,
303 flags,
304 match_pattern);
305 }
306
307
308 static void SetAtomLastCapture(FixedArray* array,
309 String* subject,
310 int from,
311 int to) {
312 NoHandleAllocation no_handles;
313 RegExpImpl::SetLastCaptureCount(array, 2);
314 RegExpImpl::SetLastSubject(array, subject);
315 RegExpImpl::SetLastInput(array, subject);
316 RegExpImpl::SetCapture(array, 0, from);
317 RegExpImpl::SetCapture(array, 1, to);
318 } 301 }
319 302
320 303
321 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 304 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
322 Handle<String> subject, 305 Handle<String> subject,
323 int index, 306 Handle<Object> index) {
324 Handle<JSArray> last_match_info) {
325 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 307 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
326 308
327 uint32_t start_index = index; 309 uint32_t start_index;
310 if (!Array::IndexFromObject(*index, &start_index)) {
311 return Handle<Smi>(Smi::FromInt(-1));
312 }
328 313
329 int value = Runtime::StringMatch(subject, needle, start_index); 314 int value = Runtime::StringMatch(subject, needle, start_index);
330 if (value == -1) return Factory::null_value(); 315 if (value == -1) return Factory::null_value();
331 ASSERT(last_match_info->HasFastElements());
332 316
333 { 317 Handle<FixedArray> array = Factory::NewFixedArray(2);
334 NoHandleAllocation no_handles; 318 array->set(0, Smi::FromInt(value));
335 FixedArray* array = last_match_info->elements(); 319 array->set(1, Smi::FromInt(value + needle->length()));
336 SetAtomLastCapture(array, *subject, value, value + needle->length()); 320 return Factory::NewJSArrayWithElements(array);
337 }
338 return last_match_info;
339 } 321 }
340 322
341 323
342 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, 324 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
343 Handle<String> subject, 325 Handle<String> subject) {
344 Handle<JSArray> last_match_info) {
345 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 326 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
346 ASSERT(last_match_info->HasFastElements());
347 Handle<JSArray> result = Factory::NewJSArray(1); 327 Handle<JSArray> result = Factory::NewJSArray(1);
348 int index = 0; 328 int index = 0;
349 int match_count = 0; 329 int match_count = 0;
350 int subject_length = subject->length(); 330 int subject_length = subject->length();
351 int needle_length = needle->length(); 331 int needle_length = needle->length();
352 int last_value = -1;
353 while (true) { 332 while (true) {
354 HandleScope scope;
355 int value = -1; 333 int value = -1;
356 if (index + needle_length <= subject_length) { 334 if (index + needle_length <= subject_length) {
357 value = Runtime::StringMatch(subject, needle, index); 335 value = Runtime::StringMatch(subject, needle, index);
358 } 336 }
359 if (value == -1) { 337 if (value == -1) break;
360 if (last_value != -1) { 338 HandleScope scope;
361 Handle<FixedArray> array(last_match_info->elements());
362 SetAtomLastCapture(*array,
363 *subject,
364 last_value,
365 last_value + needle->length());
366 }
367 break;
368 }
369
370 int end = value + needle_length; 339 int end = value + needle_length;
371 340
372 // Create an array that looks like the static last_match_info array 341 Handle<FixedArray> array = Factory::NewFixedArray(2);
373 // that is attached to the global RegExp object. We will be returning 342 array->set(0, Smi::FromInt(value));
374 // an array of these. 343 array->set(1, Smi::FromInt(end));
375 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
376 SetCapture(*array, 0, value);
377 SetCapture(*array, 1, end);
378 SetLastCaptureCount(*array, 2);
379 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); 344 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
380 SetElement(result, match_count, pair); 345 SetElement(result, match_count, pair);
381 match_count++; 346 match_count++;
382 index = end; 347 index = end;
383 if (needle_length == 0) index++; 348 if (needle_length == 0) index++;
384 last_value = value;
385 } 349 }
386 return result; 350 return result;
387 } 351 }
388 352
389 353
390 // Irregexp implementation. 354 // Irregexp implementation.
391 355
392 356
393 // Ensures that the regexp object contains a compiled version of the 357 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII
394 // source for either ASCII or non-ASCII strings. 358 // strings. If the compiled version doesn't already exist, it is compiled
395 // If the compiled version doesn't already exist, it is compiled
396 // from the source pattern. 359 // from the source pattern.
397 // If compilation fails, an exception is thrown and this function 360 // Irregexp is not feature complete yet. If there is something in the
398 // returns false. 361 // regexp that the compiler cannot currently handle, an empty
399 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, 362 // handle is returned, but no exception is thrown.
400 bool is_ascii) { 363 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
401 int index; 364 bool is_ascii) {
402 if (is_ascii) { 365 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
403 index = JSRegExp::kIrregexpASCIICodeIndex; 366 Handle<FixedArray> alternatives(
404 } else { 367 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
405 index = JSRegExp::kIrregexpUC16CodeIndex; 368 ASSERT_EQ(2, alternatives->length());
406 } 369
407 Object* entry = re->DataAt(index); 370 int index = is_ascii ? 0 : 1;
408 if (!entry->IsTheHole()) { 371 Object* entry = alternatives->get(index);
409 // A value has already been compiled. 372 if (!entry->IsNull()) {
410 if (entry->IsJSObject()) { 373 return Handle<FixedArray>(FixedArray::cast(entry));
411 // If it's a JS value, it's an error.
412 Top::Throw(entry);
413 return false;
414 }
415 return true;
416 } 374 }
417 375
418 // Compile the RegExp. 376 // Compile the RegExp.
419 ZoneScope zone_scope(DELETE_ON_EXIT); 377 ZoneScope zone_scope(DELETE_ON_EXIT);
420 378
421 JSRegExp::Flags flags = re->GetFlags(); 379 JSRegExp::Flags flags = re->GetFlags();
422 380
423 Handle<String> pattern(re->Pattern()); 381 Handle<String> pattern(re->Pattern());
424 if (!pattern->IsFlat(StringShape(*pattern))) { 382 if (!pattern->IsFlat(StringShape(*pattern))) {
425 FlattenString(pattern); 383 FlattenString(pattern);
426 } 384 }
427 385
428 RegExpCompileData compile_data; 386 RegExpCompileData compile_data;
429 FlatStringReader reader(pattern); 387 FlatStringReader reader(pattern);
430 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { 388 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
431 // Throw an exception if we fail to parse the pattern. 389 // Throw an exception if we fail to parse the pattern.
432 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. 390 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
433 ThrowRegExpException(re, 391 ThrowRegExpException(re,
434 pattern, 392 pattern,
435 compile_data.error, 393 compile_data.error,
436 "malformed_regexp"); 394 "malformed_regexp");
437 return false; 395 return Handle<FixedArray>::null();
438 } 396 }
439 RegExpEngine::CompilationResult result = 397 Handle<FixedArray> compiled_entry =
440 RegExpEngine::Compile(&compile_data, 398 RegExpEngine::Compile(&compile_data,
441 flags.is_ignore_case(), 399 flags.is_ignore_case(),
442 flags.is_multiline(), 400 flags.is_multiline(),
443 pattern, 401 pattern,
444 is_ascii); 402 is_ascii);
445 if (result.error_message != NULL) { 403 if (!compiled_entry.is_null()) {
446 // Unable to compile regexp. 404 alternatives->set(index, *compiled_entry);
447 Handle<JSArray> array = Factory::NewJSArray(2);
448 SetElement(array, 0, pattern);
449 SetElement(array,
450 1,
451 Factory::NewStringFromUtf8(CStrVector(result.error_message)));
452 Handle<Object> regexp_err =
453 Factory::NewSyntaxError("malformed_regexp", array);
454 Top::Throw(*regexp_err);
455 re->SetDataAt(index, *regexp_err);
456 return false;
457 } 405 }
458 406 return compiled_entry;
459 NoHandleAllocation no_handles;
460
461 FixedArray* data = FixedArray::cast(re->data());
462 data->set(index, result.code);
463 int register_max = IrregexpMaxRegisterCount(data);
464 if (result.num_registers > register_max) {
465 SetIrregexpMaxRegisterCount(data, result.num_registers);
466 }
467
468 return true;
469 } 407 }
470 408
471 409
472 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { 410 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
473 return Smi::cast( 411 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
474 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
475 } 412 }
476 413
477 414
478 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { 415 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
479 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); 416 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
480 } 417 }
481 418
482 419
483 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { 420 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
484 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); 421 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
422 == RegExpMacroAssembler::kBytecodeImplementation);
423 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
485 } 424 }
486 425
487 426
488 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { 427 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
489 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); 428 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
429 != RegExpMacroAssembler::kBytecodeImplementation);
430 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
490 } 431 }
491 432
492 433
493 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { 434 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
494 int index; 435 Handle<String> pattern,
495 if (is_ascii) { 436 JSRegExp::Flags flags) {
496 index = JSRegExp::kIrregexpASCIICodeIndex; 437 // Make space for ASCII and UC16 versions.
497 } else { 438 Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
498 index = JSRegExp::kIrregexpUC16CodeIndex; 439 alternatives->set_null(0);
499 } 440 alternatives->set_null(1);
500 return ByteArray::cast(re->get(index)); 441 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives);
501 } 442 return re;
502
503
504 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
505 int index;
506 if (is_ascii) {
507 index = JSRegExp::kIrregexpASCIICodeIndex;
508 } else {
509 index = JSRegExp::kIrregexpUC16CodeIndex;
510 }
511 return Code::cast(re->get(index));
512 }
513
514
515 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
516 Handle<String> pattern,
517 JSRegExp::Flags flags,
518 int capture_count) {
519 // Initialize compiled code entries to null.
520 Factory::SetRegExpIrregexpData(re,
521 JSRegExp::IRREGEXP,
522 pattern,
523 flags,
524 capture_count);
525 } 443 }
526 444
527 445
528 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, 446 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
529 Handle<String> subject, 447 Handle<String> subject,
530 int index, 448 Handle<Object> index) {
531 Handle<JSArray> last_match_info) {
532 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 449 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
450 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
533 451
534 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 452 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
535 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { 453 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
454 if (irregexp.is_null()) {
455 // We can't handle the RegExp with IRRegExp.
536 return Handle<Object>::null(); 456 return Handle<Object>::null();
537 } 457 }
538 458
539 // Prepare space for the return values. 459 // Prepare space for the return values.
540 Handle<FixedArray> re_data(FixedArray::cast(regexp->data())); 460 int number_of_registers = IrregexpNumberOfRegisters(irregexp);
541 int number_of_capture_registers = 461 OffsetsVector offsets(number_of_registers);
542 (IrregexpNumberOfCaptures(*re_data) + 1) * 2;
543 OffsetsVector offsets(number_of_capture_registers);
544 462
545 int previous_index = index; 463 int num_captures = IrregexpNumberOfCaptures(irregexp);
464
465 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
546 466
547 #ifdef DEBUG 467 #ifdef DEBUG
548 if (FLAG_trace_regexp_bytecodes) { 468 if (FLAG_trace_regexp_bytecodes) {
549 String* pattern = regexp->Pattern(); 469 String* pattern = regexp->Pattern();
550 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 470 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
551 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 471 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
552 } 472 }
553 #endif 473 #endif
554 474
555 if (!subject->IsFlat(StringShape(*subject))) { 475 if (!subject->IsFlat(StringShape(*subject))) {
556 FlattenString(subject); 476 FlattenString(subject);
557 } 477 }
558 478
559 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); 479 return IrregexpExecOnce(irregexp,
560 480 num_captures,
561 return IrregexpExecOnce(re_data,
562 number_of_capture_registers,
563 last_match_info,
564 subject, 481 subject,
565 previous_index, 482 previous_index,
566 offsets.vector(), 483 offsets.vector(),
567 offsets.length()); 484 offsets.length());
568 } 485 }
569 486
570 487
571 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, 488 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
572 Handle<String> subject, 489 Handle<String> subject) {
573 Handle<JSArray> last_match_info) {
574 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 490 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
575 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
576 491
577 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 492 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
578 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { 493 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
494 if (irregexp.is_null()) {
579 return Handle<Object>::null(); 495 return Handle<Object>::null();
580 } 496 }
581 497
582 // Prepare space for the return values. 498 // Prepare space for the return values.
583 int number_of_capture_registers = 499 int number_of_registers = IrregexpNumberOfRegisters(irregexp);
584 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 500 OffsetsVector offsets(number_of_registers);
585 OffsetsVector offsets(number_of_capture_registers);
586 501
587 int previous_index = 0; 502 int previous_index = 0;
588 503
589 Handle<JSArray> result = Factory::NewJSArray(0); 504 Handle<JSArray> result = Factory::NewJSArray(0);
590 int result_length = 0; 505 int i = 0;
591 Handle<Object> matches; 506 Handle<Object> matches;
592 507
593 if (!subject->IsFlat(StringShape(*subject))) { 508 if (!subject->IsFlat(StringShape(*subject))) {
594 FlattenString(subject); 509 FlattenString(subject);
595 } 510 }
596 511
597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
598
599 while (true) { 512 while (true) {
600 if (previous_index > subject->length() || previous_index < 0) { 513 if (previous_index > subject->length() || previous_index < 0) {
601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 514 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
602 // string length, there is no match. 515 // string length, there is no match.
603 matches = Factory::null_value(); 516 matches = Factory::null_value();
604 return result; 517 return result;
605 } else { 518 } else {
606 #ifdef DEBUG 519 #ifdef DEBUG
607 if (FLAG_trace_regexp_bytecodes) { 520 if (FLAG_trace_regexp_bytecodes) {
608 String* pattern = regexp->Pattern(); 521 String* pattern = regexp->Pattern();
609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 522 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 523 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
611 } 524 }
612 #endif 525 #endif
613 HandleScope scope;
614 matches = IrregexpExecOnce(irregexp, 526 matches = IrregexpExecOnce(irregexp,
615 number_of_capture_registers, 527 IrregexpNumberOfCaptures(irregexp),
616 last_match_info,
617 subject, 528 subject,
618 previous_index, 529 previous_index,
619 offsets.vector(), 530 offsets.vector(),
620 offsets.length()); 531 offsets.length());
621 532
622 if (matches.is_null()) { 533 if (matches.is_null()) {
623 ASSERT(Top::has_pending_exception()); 534 ASSERT(Top::has_pending_exception());
624 return matches; 535 return matches;
625 } 536 }
626 537
627 if (matches->IsJSArray()) { 538 if (matches->IsJSArray()) {
628 // Create an array that looks like the static last_match_info array 539 SetElement(result, i, matches);
629 // that is attached to the global RegExp object. We will be returning 540 i++;
630 // an array of these. 541 previous_index = offsets.vector()[1];
631 Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); 542 if (offsets.vector()[0] == offsets.vector()[1]) {
632 Handle<JSArray> latest_match = 543 previous_index++;
633 Factory::NewJSArray(kFirstCapture + number_of_capture_registers);
634 Handle<FixedArray> latest_match_array(latest_match->elements());
635
636 for (int i = 0; i < number_of_capture_registers; i++) {
637 SetCapture(*latest_match_array, i, GetCapture(*matches_array, i));
638 } 544 }
639 SetLastCaptureCount(*latest_match_array, number_of_capture_registers);
640
641 SetElement(result, result_length, latest_match);
642 result_length++;
643 previous_index = GetCapture(*matches_array, 1);
644 if (GetCapture(*matches_array, 0) == previous_index)
645 previous_index++;
646
647 } else { 545 } else {
648 ASSERT(matches->IsNull()); 546 ASSERT(matches->IsNull());
649 return result; 547 return result;
650 } 548 }
651 } 549 }
652 } 550 }
653 } 551 }
654 552
655 553
656 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp, 554 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
657 int number_of_capture_registers, 555 int num_captures,
658 Handle<JSArray> last_match_info,
659 Handle<String> subject, 556 Handle<String> subject,
660 int previous_index, 557 int previous_index,
661 int* offsets_vector, 558 int* offsets_vector,
662 int offsets_vector_length) { 559 int offsets_vector_length) {
663 StringShape shape(*subject); 560 ASSERT(subject->IsFlat(StringShape(*subject)));
664 ASSERT(subject->IsFlat(shape));
665 bool is_ascii = shape.IsAsciiRepresentation();
666 bool rc; 561 bool rc;
667 562
668 if (FLAG_regexp_native) { 563 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
564
565 switch (tag) {
566 case RegExpMacroAssembler::kIA32Implementation: {
669 #ifndef ARM 567 #ifndef ARM
670 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); 568 Handle<Code> code = IrregexpNativeCode(irregexp);
671 569
672 // Character offsets into string. 570 StringShape shape(*subject);
673 int start_offset = previous_index;
674 int end_offset = subject->length(shape);
675 571
676 if (shape.IsCons()) { 572 // Character offsets into string.
677 subject = Handle<String>(ConsString::cast(*subject)->first()); 573 int start_offset = previous_index;
678 } else if (shape.IsSliced()) { 574 int end_offset = subject->length(shape);
679 SlicedString* slice = SlicedString::cast(*subject);
680 start_offset += slice->start();
681 end_offset += slice->start();
682 subject = Handle<String>(slice->buffer());
683 }
684 575
685 // String is now either Sequential or External 576 if (shape.IsCons()) {
686 StringShape flatshape(*subject); 577 subject = Handle<String>(ConsString::cast(*subject)->first());
687 bool is_ascii = flatshape.IsAsciiRepresentation(); 578 } else if (shape.IsSliced()) {
688 int char_size_shift = is_ascii ? 0 : 1; 579 SlicedString* slice = SlicedString::cast(*subject);
580 start_offset += slice->start();
581 end_offset += slice->start();
582 subject = Handle<String>(slice->buffer());
583 }
689 584
690 RegExpMacroAssemblerIA32::Result res; 585 // String is now either Sequential or External
586 StringShape flatshape(*subject);
587 bool is_ascii = flatshape.IsAsciiRepresentation();
588 int char_size_shift = is_ascii ? 0 : 1;
691 589
692 if (flatshape.IsExternal()) { 590 RegExpMacroAssemblerIA32::Result res;
693 const byte* address; 591
694 if (is_ascii) { 592 if (flatshape.IsExternal()) {
695 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); 593 const byte* address;
696 address = reinterpret_cast<const byte*>(ext->resource()->data()); 594 if (is_ascii) {
697 } else { 595 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
698 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); 596 address = reinterpret_cast<const byte*>(ext->resource()->data());
699 address = reinterpret_cast<const byte*>(ext->resource()->data()); 597 } else {
598 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
599 address = reinterpret_cast<const byte*>(ext->resource()->data());
600 }
601 res = RegExpMacroAssemblerIA32::Execute(
602 *code,
603 const_cast<Address*>(&address),
604 start_offset << char_size_shift,
605 end_offset << char_size_shift,
606 offsets_vector,
607 previous_index == 0);
608 } else { // Sequential string
609 ASSERT(StringShape(*subject).IsSequential());
610 Address char_address =
611 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
612 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
613 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
614 res = RegExpMacroAssemblerIA32::Execute(
615 *code,
616 reinterpret_cast<Address*>(subject.location()),
617 byte_offset + (start_offset << char_size_shift),
618 byte_offset + (end_offset << char_size_shift),
619 offsets_vector,
620 previous_index == 0);
700 } 621 }
701 res = RegExpMacroAssemblerIA32::Execute(
702 *code,
703 const_cast<Address*>(&address),
704 start_offset << char_size_shift,
705 end_offset << char_size_shift,
706 offsets_vector,
707 previous_index == 0);
708 } else { // Sequential string
709 ASSERT(StringShape(*subject).IsSequential());
710 Address char_address =
711 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
712 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
713 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
714 res = RegExpMacroAssemblerIA32::Execute(
715 *code,
716 reinterpret_cast<Address*>(subject.location()),
717 byte_offset + (start_offset << char_size_shift),
718 byte_offset + (end_offset << char_size_shift),
719 offsets_vector,
720 previous_index == 0);
721 }
722 622
723 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { 623 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
724 ASSERT(Top::has_pending_exception()); 624 ASSERT(Top::has_pending_exception());
725 return Handle<Object>::null(); 625 return Handle<Object>::null();
726 } 626 }
727 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); 627 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
728 628
729 if (rc) { 629 if (rc) {
730 // Capture values are relative to start_offset only. 630 // Capture values are relative to start_offset only.
731 for (int i = 0; i < offsets_vector_length; i++) { 631 for (int i = 0; i < offsets_vector_length; i++) {
732 if (offsets_vector[i] >= 0) { 632 if (offsets_vector[i] >= 0) {
733 offsets_vector[i] += previous_index; 633 offsets_vector[i] += previous_index;
634 }
734 } 635 }
735 } 636 }
637 break;
638 #else
639 UNIMPLEMENTED();
640 rc = false;
641 break;
642 #endif
736 } 643 }
737 } else { 644 case RegExpMacroAssembler::kBytecodeImplementation: {
738 #else 645 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
739 // Unimplemented on ARM, fall through to bytecode. 646 offsets_vector[i] = -1;
740 } 647 }
741 { 648 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
742 #endif 649
743 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 650 rc = IrregexpInterpreter::Match(byte_codes,
744 offsets_vector[i] = -1; 651 subject,
652 offsets_vector,
653 previous_index);
654 break;
745 } 655 }
746 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); 656 case RegExpMacroAssembler::kARMImplementation:
747 657 default:
748 rc = IrregexpInterpreter::Match(byte_codes, 658 UNREACHABLE();
749 subject, 659 rc = false;
750 offsets_vector, 660 break;
751 previous_index);
752 } 661 }
753 662
754 if (!rc) { 663 if (!rc) {
755 return Factory::null_value(); 664 return Factory::null_value();
756 } 665 }
757 666
758 FixedArray* array = last_match_info->elements(); 667 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
759 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
760 // The captures come in (start, end+1) pairs. 668 // The captures come in (start, end+1) pairs.
761 for (int i = 0; i < number_of_capture_registers; i += 2) { 669 for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
762 SetCapture(array, i, offsets_vector[i]); 670 array->set(i, Smi::FromInt(offsets_vector[i]));
763 SetCapture(array, i + 1, offsets_vector[i + 1]); 671 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
764 } 672 }
765 SetLastCaptureCount(array, number_of_capture_registers); 673 return Factory::NewJSArrayWithElements(array);
766 SetLastSubject(array, *subject);
767 SetLastInput(array, *subject);
768 return last_match_info;
769 } 674 }
770 675
771 676
772 // ------------------------------------------------------------------- 677 // -------------------------------------------------------------------
773 // Implementation of the Irregexp regular expression engine. 678 // Implmentation of the Irregexp regular expression engine.
774 // 679 //
775 // The Irregexp regular expression engine is intended to be a complete 680 // The Irregexp regular expression engine is intended to be a complete
776 // implementation of ECMAScript regular expressions. It generates either 681 // implementation of ECMAScript regular expressions. It generates either
777 // bytecodes or native code. 682 // bytecodes or native code.
778 683
779 // The Irregexp regexp engine is structured in three steps. 684 // The Irregexp regexp engine is structured in three steps.
780 // 1) The parser generates an abstract syntax tree. See ast.cc. 685 // 1) The parser generates an abstract syntax tree. See ast.cc.
781 // 2) From the AST a node network is created. The nodes are all 686 // 2) From the AST a node network is created. The nodes are all
782 // subclasses of RegExpNode. The nodes represent states when 687 // subclasses of RegExpNode. The nodes represent states when
783 // executing a regular expression. Several optimizations are 688 // executing a regular expression. Several optimizations are
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
980 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); 885 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
981 886
982 int AllocateRegister() { 887 int AllocateRegister() {
983 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 888 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
984 reg_exp_too_big_ = true; 889 reg_exp_too_big_ = true;
985 return next_register_; 890 return next_register_;
986 } 891 }
987 return next_register_++; 892 return next_register_++;
988 } 893 }
989 894
990 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, 895 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
991 RegExpNode* start, 896 RegExpNode* start,
992 int capture_count, 897 int capture_count,
993 Handle<String> pattern); 898 Handle<String> pattern);
994 899
995 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 900 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
996 901
997 static const int kImplementationOffset = 0; 902 static const int kImplementationOffset = 0;
998 static const int kNumberOfRegistersOffset = 0; 903 static const int kNumberOfRegistersOffset = 0;
999 static const int kCodeOffset = 1; 904 static const int kCodeOffset = 1;
1000 905
1001 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 906 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
1002 EndNode* accept() { return accept_; } 907 EndNode* accept() { return accept_; }
1003 908
(...skipping 24 matching lines...) Expand all
1028 public: 933 public:
1029 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 934 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1030 compiler->IncrementRecursionDepth(); 935 compiler->IncrementRecursionDepth();
1031 } 936 }
1032 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 937 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1033 private: 938 private:
1034 RegExpCompiler* compiler_; 939 RegExpCompiler* compiler_;
1035 }; 940 };
1036 941
1037 942
1038 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 943 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {
1039 return RegExpEngine::CompilationResult("RegExp too big"); 944 Handle<JSArray> array = Factory::NewJSArray(2);
945 SetElement(array, 0, pattern);
946 const char* message = "RegExp too big";
947 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
948 Handle<Object> regexp_err =
949 Factory::NewSyntaxError("malformed_regexp", array);
950 Top::Throw(*regexp_err);
951 return Handle<FixedArray>();
1040 } 952 }
1041 953
1042 954
1043 // Attempts to compile the regexp using an Irregexp code generator. Returns 955 // Attempts to compile the regexp using an Irregexp code generator. Returns
1044 // a fixed array or a null handle depending on whether it succeeded. 956 // a fixed array or a null handle depending on whether it succeeded.
1045 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 957 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
1046 : next_register_(2 * (capture_count + 1)), 958 : next_register_(2 * (capture_count + 1)),
1047 work_list_(NULL), 959 work_list_(NULL),
1048 recursion_depth_(0), 960 recursion_depth_(0),
1049 ignore_case_(ignore_case), 961 ignore_case_(ignore_case),
1050 ascii_(ascii), 962 ascii_(ascii),
1051 reg_exp_too_big_(false) { 963 reg_exp_too_big_(false) {
1052 accept_ = new EndNode(EndNode::ACCEPT); 964 accept_ = new EndNode(EndNode::ACCEPT);
1053 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 965 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1054 } 966 }
1055 967
1056 968
1057 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 969 Handle<FixedArray> RegExpCompiler::Assemble(
1058 RegExpMacroAssembler* macro_assembler, 970 RegExpMacroAssembler* macro_assembler,
1059 RegExpNode* start, 971 RegExpNode* start,
1060 int capture_count, 972 int capture_count,
1061 Handle<String> pattern) { 973 Handle<String> pattern) {
1062 #ifdef DEBUG 974 #ifdef DEBUG
1063 if (FLAG_trace_regexp_assembler) 975 if (FLAG_trace_regexp_assembler)
1064 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); 976 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
1065 else 977 else
1066 #endif 978 #endif
1067 macro_assembler_ = macro_assembler; 979 macro_assembler_ = macro_assembler;
1068 List <RegExpNode*> work_list(0); 980 List <RegExpNode*> work_list(0);
1069 work_list_ = &work_list; 981 work_list_ = &work_list;
1070 Label fail; 982 Label fail;
1071 macro_assembler_->PushBacktrack(&fail); 983 macro_assembler_->PushBacktrack(&fail);
1072 Trace new_trace; 984 Trace new_trace;
1073 start->Emit(this, &new_trace); 985 start->Emit(this, &new_trace);
1074 macro_assembler_->Bind(&fail); 986 macro_assembler_->Bind(&fail);
1075 macro_assembler_->Fail(); 987 macro_assembler_->Fail();
1076 while (!work_list.is_empty()) { 988 while (!work_list.is_empty()) {
1077 work_list.RemoveLast()->Emit(this, &new_trace); 989 work_list.RemoveLast()->Emit(this, &new_trace);
1078 } 990 }
1079 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); 991 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);
1080 992 Handle<FixedArray> array =
993 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
994 array->set(RegExpImpl::kIrregexpImplementationIndex,
995 Smi::FromInt(macro_assembler_->Implementation()));
996 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
997 Smi::FromInt(next_register_));
998 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
999 Smi::FromInt(capture_count));
1081 Handle<Object> code = macro_assembler_->GetCode(pattern); 1000 Handle<Object> code = macro_assembler_->GetCode(pattern);
1082 1001 array->set(RegExpImpl::kIrregexpCodeIndex, *code);
1083 work_list_ = NULL; 1002 work_list_ = NULL;
1084 #ifdef DEBUG 1003 #ifdef DEBUG
1085 if (FLAG_trace_regexp_assembler) { 1004 if (FLAG_trace_regexp_assembler) {
1086 delete macro_assembler_; 1005 delete macro_assembler_;
1087 } 1006 }
1088 #endif 1007 #endif
1089 return RegExpEngine::CompilationResult(*code, next_register_); 1008 return array;
1090 } 1009 }
1091 1010
1092 1011
1093 bool Trace::DeferredAction::Mentions(int that) { 1012 bool Trace::DeferredAction::Mentions(int that) {
1094 if (type() == ActionNode::CLEAR_CAPTURES) { 1013 if (type() == ActionNode::CLEAR_CAPTURES) {
1095 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 1014 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
1096 return range.Contains(that); 1015 return range.Contains(that);
1097 } else { 1016 } else {
1098 return reg() == that; 1017 return reg() == that;
1099 } 1018 }
(...skipping 2697 matching lines...) Expand 10 before | Expand all | Expand 10 after
3797 // x{f, t} becomes this: 3716 // x{f, t} becomes this:
3798 // 3717 //
3799 // (r++)<-. 3718 // (r++)<-.
3800 // | ` 3719 // | `
3801 // | (x) 3720 // | (x)
3802 // v ^ 3721 // v ^
3803 // (r=0)-->(?)---/ [if r < t] 3722 // (r=0)-->(?)---/ [if r < t]
3804 // | 3723 // |
3805 // [if r >= f] \----> ... 3724 // [if r >= f] \----> ...
3806 // 3725 //
3726 //
3727 // TODO(someone): clear captures on repetition and handle empty
3728 // matches.
3807 3729
3808 // 15.10.2.5 RepeatMatcher algorithm. 3730 // 15.10.2.5 RepeatMatcher algorithm.
3809 // The parser has already eliminated the case where max is 0. In the case 3731 // The parser has already eliminated the case where max is 0. In the case
3810 // where max_match is zero the parser has removed the quantifier if min was 3732 // where max_match is zero the parser has removed the quantifier if min was
3811 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. 3733 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
3812 3734
3813 // If we know that we cannot match zero length then things are a little 3735 // If we know that we cannot match zero length then things are a little
3814 // simpler since we don't need to make the special zero length match check 3736 // simpler since we don't need to make the special zero length match check
3815 // from step 2.1. If the min and max are small we can unroll a little in 3737 // from step 2.1. If the min and max are small we can unroll a little in
3816 // this case. 3738 // this case.
(...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after
4663 } 4585 }
4664 } 4586 }
4665 4587
4666 4588
4667 void DispatchTableConstructor::VisitAction(ActionNode* that) { 4589 void DispatchTableConstructor::VisitAction(ActionNode* that) {
4668 RegExpNode* target = that->on_success(); 4590 RegExpNode* target = that->on_success();
4669 target->Accept(this); 4591 target->Accept(this);
4670 } 4592 }
4671 4593
4672 4594
4673 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, 4595 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
4674 bool ignore_case, 4596 bool ignore_case,
4675 bool is_multiline, 4597 bool is_multiline,
4676 Handle<String> pattern, 4598 Handle<String> pattern,
4677 bool is_ascii) { 4599 bool is_ascii) {
4678 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 4600 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
4679 return IrregexpRegExpTooBig(); 4601 return IrregexpRegExpTooBig(pattern);
4680 } 4602 }
4681 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 4603 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
4682 // Wrap the body of the regexp in capture #0. 4604 // Wrap the body of the regexp in capture #0.
4683 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4605 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
4684 0, 4606 0,
4685 &compiler, 4607 &compiler,
4686 compiler.accept()); 4608 compiler.accept());
4687 RegExpNode* node = captured_body; 4609 RegExpNode* node = captured_body;
4688 if (!data->tree->IsAnchored()) { 4610 if (!data->tree->IsAnchored()) {
4689 // Add a .*? at the beginning, outside the body capture, unless 4611 // Add a .*? at the beginning, outside the body capture, unless
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
4736 EmbeddedVector<byte, 1024> codes; 4658 EmbeddedVector<byte, 1024> codes;
4737 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4659 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4738 return compiler.Assemble(&macro_assembler, 4660 return compiler.Assemble(&macro_assembler,
4739 node, 4661 node,
4740 data->capture_count, 4662 data->capture_count,
4741 pattern); 4663 pattern);
4742 } 4664 }
4743 4665
4744 4666
4745 }} // namespace v8::internal 4667 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/macros.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698