OLD | NEW |
---|---|
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
206 Handle<String> pattern, | 206 Handle<String> pattern, |
207 Handle<String> flag_str) { | 207 Handle<String> flag_str) { |
208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
210 bool in_cache = !cached.is_null(); | 210 bool in_cache = !cached.is_null(); |
211 LOG(RegExpCompileEvent(re, in_cache)); | 211 LOG(RegExpCompileEvent(re, in_cache)); |
212 | 212 |
213 Handle<Object> result; | 213 Handle<Object> result; |
214 if (in_cache) { | 214 if (in_cache) { |
215 re->set_data(*cached); | 215 re->set_data(*cached); |
216 result = re; | 216 return re; |
217 } else { | 217 } |
218 FlattenString(pattern); | 218 FlattenString(pattern); |
219 ZoneScope zone_scope(DELETE_ON_EXIT); | 219 ZoneScope zone_scope(DELETE_ON_EXIT); |
220 RegExpCompileData parse_result; | 220 RegExpCompileData parse_result; |
221 FlatStringReader reader(pattern); | 221 FlatStringReader reader(pattern); |
222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
223 // Throw an exception if we fail to parse the pattern. | 223 // Throw an exception if we fail to parse the pattern. |
224 ThrowRegExpException(re, | 224 ThrowRegExpException(re, |
225 pattern, | 225 pattern, |
226 parse_result.error, | 226 parse_result.error, |
227 "malformed_regexp"); | 227 "malformed_regexp"); |
228 return Handle<Object>::null(); | 228 return Handle<Object>::null(); |
229 } | |
230 | |
231 if (parse_result.simple && !flags.is_ignore_case()) { | |
232 // Parse-tree is a single atom that is equal to the pattern. | |
233 result = AtomCompile(re, pattern, flags, pattern); | |
234 } else if (parse_result.tree->IsAtom() && | |
235 !flags.is_ignore_case() && | |
236 parse_result.capture_count == 0) { | |
237 RegExpAtom* atom = parse_result.tree->AsAtom(); | |
238 Vector<const uc16> atom_pattern = atom->data(); | |
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | |
240 result = AtomCompile(re, pattern, flags, atom_string); | |
241 } else { | |
242 result = IrregexpPrepare(re, pattern, flags); | |
243 } | |
244 Object* data = re->data(); | |
245 if (data->IsFixedArray()) { | |
246 // If compilation succeeded then the data is set on the regexp | |
247 // and we can store it in the cache. | |
248 Handle<FixedArray> data(FixedArray::cast(re->data())); | |
249 CompilationCache::PutRegExp(pattern, flags, data); | |
250 } | |
251 } | 229 } |
252 | 230 |
253 return result; | 231 if (parse_result.simple && !flags.is_ignore_case()) { |
232 // Parse-tree is a single atom that is equal to the pattern. | |
233 AtomCompile(re, pattern, flags, pattern); | |
234 } else if (parse_result.tree->IsAtom() && | |
235 !flags.is_ignore_case() && | |
236 parse_result.capture_count == 0) { | |
237 RegExpAtom* atom = parse_result.tree->AsAtom(); | |
238 Vector<const uc16> atom_pattern = atom->data(); | |
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | |
240 AtomCompile(re, pattern, flags, atom_string); | |
241 } else { | |
242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | |
243 } | |
244 ASSERT(re->data()->IsFixedArray()); | |
245 // Compilation succeeded so the data is set on the regexp | |
246 // and we can store it in the cache. | |
247 Handle<FixedArray> data(FixedArray::cast(re->data())); | |
248 CompilationCache::PutRegExp(pattern, flags, data); | |
249 | |
250 return re; | |
254 } | 251 } |
255 | 252 |
256 | 253 |
257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
258 Handle<String> subject, | 255 Handle<String> subject, |
259 Handle<Object> index) { | 256 int index, |
257 Handle<JSArray> last_match_info) { | |
260 switch (regexp->TypeTag()) { | 258 switch (regexp->TypeTag()) { |
261 case JSRegExp::ATOM: | 259 case JSRegExp::ATOM: |
262 return AtomExec(regexp, subject, index); | 260 return AtomExec(regexp, subject, index, last_match_info); |
263 case JSRegExp::IRREGEXP: { | 261 case JSRegExp::IRREGEXP: { |
264 Handle<Object> result = IrregexpExec(regexp, subject, index); | 262 Handle<Object> result = |
263 IrregexpExec(regexp, subject, index, last_match_info); | |
265 ASSERT(!result.is_null() || Top::has_pending_exception()); | 264 ASSERT(!result.is_null() || Top::has_pending_exception()); |
266 return result; | 265 return result; |
267 } | 266 } |
268 default: | 267 default: |
269 UNREACHABLE(); | 268 UNREACHABLE(); |
270 return Handle<Object>::null(); | 269 return Handle<Object>::null(); |
271 } | 270 } |
272 } | 271 } |
273 | 272 |
274 | 273 |
275 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
276 Handle<String> subject) { | 275 Handle<String> subject, |
276 Handle<JSArray> last_match_info) { | |
277 switch (regexp->TypeTag()) { | 277 switch (regexp->TypeTag()) { |
278 case JSRegExp::ATOM: | 278 case JSRegExp::ATOM: |
279 return AtomExecGlobal(regexp, subject); | 279 return AtomExecGlobal(regexp, subject, last_match_info); |
280 case JSRegExp::IRREGEXP: { | 280 case JSRegExp::IRREGEXP: { |
281 Handle<Object> result = IrregexpExecGlobal(regexp, subject); | 281 Handle<Object> result = |
282 IrregexpExecGlobal(regexp, subject, last_match_info); | |
282 ASSERT(!result.is_null() || Top::has_pending_exception()); | 283 ASSERT(!result.is_null() || Top::has_pending_exception()); |
283 return result; | 284 return result; |
284 } | 285 } |
285 default: | 286 default: |
286 UNREACHABLE(); | 287 UNREACHABLE(); |
287 return Handle<Object>::null(); | 288 return Handle<Object>::null(); |
288 } | 289 } |
289 } | 290 } |
290 | 291 |
291 | 292 |
292 // RegExp Atom implementation: Simple string search using indexOf. | 293 // RegExp Atom implementation: Simple string search using indexOf. |
293 | 294 |
294 | 295 |
295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
296 Handle<String> pattern, | 297 Handle<String> pattern, |
297 JSRegExp::Flags flags, | 298 JSRegExp::Flags flags, |
298 Handle<String> match_pattern) { | 299 Handle<String> match_pattern) { |
299 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); | 300 Factory::SetRegExpAtomData(re, |
300 return re; | 301 JSRegExp::ATOM, |
302 pattern, | |
303 flags, | |
304 match_pattern); | |
305 } | |
306 | |
307 | |
308 static void SetAtomLastCapture(FixedArray* array, | |
309 String* subject, | |
310 int from, | |
311 int to) { | |
312 NoHandleAllocation no_handles; | |
313 RegExpImpl::SetLastCaptureCount(array, 2); | |
314 RegExpImpl::SetLastSubject(array, subject); | |
315 RegExpImpl::SetLastInput(array, subject); | |
316 RegExpImpl::SetCapture(array, 0, from); | |
317 RegExpImpl::SetCapture(array, 1, to); | |
301 } | 318 } |
302 | 319 |
303 | 320 |
304 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 321 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
305 Handle<String> subject, | 322 Handle<String> subject, |
306 Handle<Object> index) { | 323 int index, |
324 Handle<JSArray> last_match_info) { | |
307 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 325 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
308 | 326 |
309 uint32_t start_index; | 327 uint32_t start_index = index; |
310 if (!Array::IndexFromObject(*index, &start_index)) { | |
311 return Handle<Smi>(Smi::FromInt(-1)); | |
312 } | |
313 | 328 |
314 int value = Runtime::StringMatch(subject, needle, start_index); | 329 int value = Runtime::StringMatch(subject, needle, start_index); |
315 if (value == -1) return Factory::null_value(); | 330 if (value == -1) return Factory::null_value(); |
331 ASSERT(last_match_info->HasFastElements()); | |
316 | 332 |
317 Handle<FixedArray> array = Factory::NewFixedArray(2); | 333 { |
318 array->set(0, Smi::FromInt(value)); | 334 NoHandleAllocation no_handles; |
319 array->set(1, Smi::FromInt(value + needle->length())); | 335 FixedArray* array = last_match_info->elements(); |
320 return Factory::NewJSArrayWithElements(array); | 336 SetAtomLastCapture(array, *subject, value, value + needle->length()); |
337 } | |
338 return last_match_info; | |
321 } | 339 } |
322 | 340 |
323 | 341 |
324 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, | 342 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
325 Handle<String> subject) { | 343 Handle<String> subject, |
344 Handle<JSArray> last_match_info) { | |
326 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 345 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
346 ASSERT(last_match_info->HasFastElements()); | |
327 Handle<JSArray> result = Factory::NewJSArray(1); | 347 Handle<JSArray> result = Factory::NewJSArray(1); |
328 int index = 0; | 348 int index = 0; |
329 int match_count = 0; | 349 int match_count = 0; |
330 int subject_length = subject->length(); | 350 int subject_length = subject->length(); |
331 int needle_length = needle->length(); | 351 int needle_length = needle->length(); |
352 int last_value = -1; | |
332 while (true) { | 353 while (true) { |
354 HandleScope scope; | |
333 int value = -1; | 355 int value = -1; |
334 if (index + needle_length <= subject_length) { | 356 if (index + needle_length <= subject_length) { |
335 value = Runtime::StringMatch(subject, needle, index); | 357 value = Runtime::StringMatch(subject, needle, index); |
336 } | 358 } |
337 if (value == -1) break; | 359 if (value == -1) { |
338 HandleScope scope; | 360 if (last_value != -1) { |
361 Handle<FixedArray> array(last_match_info->elements()); | |
362 SetAtomLastCapture(*array, | |
363 *subject, | |
364 last_value, | |
365 last_value + needle->length()); | |
366 } | |
367 break; | |
368 } | |
369 | |
339 int end = value + needle_length; | 370 int end = value + needle_length; |
340 | 371 |
341 Handle<FixedArray> array = Factory::NewFixedArray(2); | 372 // Create an array that looks like the static last_match_info array |
342 array->set(0, Smi::FromInt(value)); | 373 // that is attached to the global RegExp object. We will be returning |
343 array->set(1, Smi::FromInt(end)); | 374 // an array of these. |
375 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); | |
376 SetCapture(*array, 0, value); | |
377 SetCapture(*array, 1, end); | |
378 SetLastCaptureCount(*array, 2); | |
344 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); | 379 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
345 SetElement(result, match_count, pair); | 380 SetElement(result, match_count, pair); |
346 match_count++; | 381 match_count++; |
347 index = end; | 382 index = end; |
348 if (needle_length == 0) index++; | 383 if (needle_length == 0) index++; |
384 last_value = value; | |
349 } | 385 } |
350 return result; | 386 return result; |
351 } | 387 } |
352 | 388 |
353 | 389 |
354 // Irregexp implementation. | 390 // Irregexp implementation. |
355 | 391 |
356 | 392 |
357 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII | 393 // Ensures that the regexp object contains a compiled version of the |
358 // strings. If the compiled version doesn't already exist, it is compiled | 394 // source for either ASCII or non-ASCII strings. |
395 // If the compiled version doesn't already exist, it is compiled | |
359 // from the source pattern. | 396 // from the source pattern. |
360 // Irregexp is not feature complete yet. If there is something in the | 397 // If compilation fails, an exception is thrown and this function |
361 // regexp that the compiler cannot currently handle, an empty | 398 // returns false. |
362 // handle is returned, but no exception is thrown. | 399 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, |
363 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, | 400 bool is_ascii) { |
364 bool is_ascii) { | 401 int index; |
365 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); | 402 if (is_ascii) { |
366 Handle<FixedArray> alternatives( | 403 index = JSRegExp::kIrregexpASCIICodeIndex; |
367 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); | 404 } else { |
368 ASSERT_EQ(2, alternatives->length()); | 405 index = JSRegExp::kIrregexpUC16CodeIndex; |
369 | 406 } |
370 int index = is_ascii ? 0 : 1; | 407 Object* entry = re->DataAt(index); |
371 Object* entry = alternatives->get(index); | 408 if (!entry->IsTheHole()) { |
372 if (!entry->IsNull()) { | 409 // A value has already been compiled. |
373 return Handle<FixedArray>(FixedArray::cast(entry)); | 410 if (entry->IsJSObject()) { |
411 // If it's a JS value, it's an error. | |
412 Top::Throw(entry); | |
413 return false; | |
414 } | |
415 return true; | |
374 } | 416 } |
375 | 417 |
376 // Compile the RegExp. | 418 // Compile the RegExp. |
377 ZoneScope zone_scope(DELETE_ON_EXIT); | 419 ZoneScope zone_scope(DELETE_ON_EXIT); |
378 | 420 |
379 JSRegExp::Flags flags = re->GetFlags(); | 421 JSRegExp::Flags flags = re->GetFlags(); |
380 | 422 |
381 Handle<String> pattern(re->Pattern()); | 423 Handle<String> pattern(re->Pattern()); |
382 if (!pattern->IsFlat(StringShape(*pattern))) { | 424 if (!pattern->IsFlat(StringShape(*pattern))) { |
383 FlattenString(pattern); | 425 FlattenString(pattern); |
384 } | 426 } |
385 | 427 |
386 RegExpCompileData compile_data; | 428 RegExpCompileData compile_data; |
387 FlatStringReader reader(pattern); | 429 FlatStringReader reader(pattern); |
388 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { | 430 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { |
389 // Throw an exception if we fail to parse the pattern. | 431 // Throw an exception if we fail to parse the pattern. |
390 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. | 432 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. |
391 ThrowRegExpException(re, | 433 ThrowRegExpException(re, |
392 pattern, | 434 pattern, |
393 compile_data.error, | 435 compile_data.error, |
394 "malformed_regexp"); | 436 "malformed_regexp"); |
395 return Handle<FixedArray>::null(); | 437 return false; |
396 } | 438 } |
397 Handle<FixedArray> compiled_entry = | 439 RegExpEngine::CompilationResult result = |
398 RegExpEngine::Compile(&compile_data, | 440 RegExpEngine::Compile(&compile_data, |
399 flags.is_ignore_case(), | 441 flags.is_ignore_case(), |
400 flags.is_multiline(), | 442 flags.is_multiline(), |
401 pattern, | 443 pattern, |
402 is_ascii); | 444 is_ascii); |
403 if (!compiled_entry.is_null()) { | 445 if (result.error_message != NULL) { |
404 alternatives->set(index, *compiled_entry); | 446 // Unable to compile regexp. |
447 Handle<JSArray> array = Factory::NewJSArray(2); | |
448 SetElement(array, 0, pattern); | |
449 SetElement(array, | |
450 1, | |
451 Factory::NewStringFromUtf8(CStrVector(result.error_message))); | |
452 Handle<Object> regexp_err = | |
453 Factory::NewSyntaxError("malformed_regexp", array); | |
454 Top::Throw(*regexp_err); | |
455 re->SetDataAt(index, *regexp_err); | |
456 return false; | |
405 } | 457 } |
406 return compiled_entry; | 458 |
459 NoHandleAllocation no_handles; | |
460 | |
461 FixedArray* data = FixedArray::cast(re->data()); | |
462 data->set(index, result.code); | |
463 int register_max = IrregexpMaxRegisterCount(data); | |
464 if (result.num_registers > register_max) { | |
465 SetIrregexpMaxRegisterCount(data, result.num_registers); | |
466 } | |
467 | |
468 return true; | |
407 } | 469 } |
408 | 470 |
409 | 471 |
410 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { | 472 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { |
411 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); | 473 return Smi::cast( |
474 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); | |
412 } | 475 } |
413 | 476 |
414 | 477 |
415 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { | 478 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { |
416 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); | 479 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); |
417 } | 480 } |
418 | 481 |
419 | 482 |
420 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { | 483 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { |
421 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() | 484 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); |
422 == RegExpMacroAssembler::kBytecodeImplementation); | |
423 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex))); | |
424 } | 485 } |
425 | 486 |
426 | 487 |
427 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { | 488 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { |
428 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() | 489 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); |
429 != RegExpMacroAssembler::kBytecodeImplementation); | |
430 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex))); | |
431 } | 490 } |
432 | 491 |
433 | 492 |
434 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | 493 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { |
435 Handle<String> pattern, | 494 int index; |
436 JSRegExp::Flags flags) { | 495 if (is_ascii) { |
437 // Make space for ASCII and UC16 versions. | 496 index = JSRegExp::kIrregexpASCIICodeIndex; |
438 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); | 497 } else { |
439 alternatives->set_null(0); | 498 index = JSRegExp::kIrregexpUC16CodeIndex; |
440 alternatives->set_null(1); | 499 } |
441 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); | 500 return ByteArray::cast(re->get(index)); |
442 return re; | 501 } |
502 | |
503 | |
504 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | |
505 int index; | |
506 if (is_ascii) { | |
507 index = JSRegExp::kIrregexpASCIICodeIndex; | |
508 } else { | |
509 index = JSRegExp::kIrregexpUC16CodeIndex; | |
510 } | |
511 return Code::cast(re->get(index)); | |
512 } | |
513 | |
514 | |
515 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | |
516 Handle<String> pattern, | |
517 JSRegExp::Flags flags, | |
518 int capture_count) { | |
519 // Initialize compiled code entries to null. | |
520 Factory::SetRegExpIrregexpData(re, | |
521 JSRegExp::IRREGEXP, | |
522 pattern, | |
523 flags, | |
524 capture_count); | |
443 } | 525 } |
444 | 526 |
445 | 527 |
446 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 528 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
447 Handle<String> subject, | 529 Handle<String> subject, |
448 Handle<Object> index) { | 530 int index, |
531 Handle<JSArray> last_match_info) { | |
449 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 532 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
450 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); | |
451 | 533 |
452 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 534 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
453 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); | 535 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
454 if (irregexp.is_null()) { | |
455 // We can't handle the RegExp with IRRegExp. | |
456 return Handle<Object>::null(); | 536 return Handle<Object>::null(); |
457 } | 537 } |
458 | 538 |
459 // Prepare space for the return values. | 539 // Prepare space for the return values. |
460 int number_of_registers = IrregexpNumberOfRegisters(irregexp); | 540 Handle<FixedArray> re_data(FixedArray::cast(regexp->data())); |
461 OffsetsVector offsets(number_of_registers); | 541 int number_of_capture_registers = |
542 (IrregexpNumberOfCaptures(*re_data) + 1) * 2; | |
543 OffsetsVector offsets(number_of_capture_registers); | |
462 | 544 |
463 int num_captures = IrregexpNumberOfCaptures(irregexp); | 545 int previous_index = index; |
464 | |
465 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | |
466 | 546 |
467 #ifdef DEBUG | 547 #ifdef DEBUG |
468 if (FLAG_trace_regexp_bytecodes) { | 548 if (FLAG_trace_regexp_bytecodes) { |
469 String* pattern = regexp->Pattern(); | 549 String* pattern = regexp->Pattern(); |
470 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 550 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
471 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 551 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
472 } | 552 } |
473 #endif | 553 #endif |
474 | 554 |
475 if (!subject->IsFlat(StringShape(*subject))) { | 555 if (!subject->IsFlat(StringShape(*subject))) { |
476 FlattenString(subject); | 556 FlattenString(subject); |
477 } | 557 } |
478 | 558 |
479 return IrregexpExecOnce(irregexp, | 559 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
480 num_captures, | 560 |
561 return IrregexpExecOnce(re_data, | |
562 number_of_capture_registers, | |
563 last_match_info, | |
481 subject, | 564 subject, |
482 previous_index, | 565 previous_index, |
483 offsets.vector(), | 566 offsets.vector(), |
484 offsets.length()); | 567 offsets.length()); |
485 } | 568 } |
486 | 569 |
487 | 570 |
488 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 571 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
489 Handle<String> subject) { | 572 Handle<String> subject, |
573 Handle<JSArray> last_match_info) { | |
490 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 574 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
575 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | |
491 | 576 |
492 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 577 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
493 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); | 578 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
494 if (irregexp.is_null()) { | |
495 return Handle<Object>::null(); | 579 return Handle<Object>::null(); |
496 } | 580 } |
497 | 581 |
498 // Prepare space for the return values. | 582 // Prepare space for the return values. |
499 int number_of_registers = IrregexpNumberOfRegisters(irregexp); | 583 int number_of_capture_registers = |
500 OffsetsVector offsets(number_of_registers); | 584 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
585 OffsetsVector offsets(number_of_capture_registers); | |
501 | 586 |
502 int previous_index = 0; | 587 int previous_index = 0; |
503 | 588 |
504 Handle<JSArray> result = Factory::NewJSArray(0); | 589 Handle<JSArray> result = Factory::NewJSArray(0); |
505 int i = 0; | 590 int result_length = 0; |
506 Handle<Object> matches; | 591 Handle<Object> matches; |
507 | 592 |
508 if (!subject->IsFlat(StringShape(*subject))) { | 593 if (!subject->IsFlat(StringShape(*subject))) { |
509 FlattenString(subject); | 594 FlattenString(subject); |
510 } | 595 } |
511 | 596 |
597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
598 | |
512 while (true) { | 599 while (true) { |
513 if (previous_index > subject->length() || previous_index < 0) { | 600 if (previous_index > subject->length() || previous_index < 0) { |
514 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
515 // string length, there is no match. | 602 // string length, there is no match. |
516 matches = Factory::null_value(); | 603 matches = Factory::null_value(); |
Mads Ager (chromium)
2009/03/11 13:49:17
I know this is not your code, but why is there an
Erik Corry
2009/03/11 14:01:06
Lasse has a patch waiting that also fixes this.
| |
517 return result; | 604 return result; |
518 } else { | 605 } else { |
519 #ifdef DEBUG | 606 #ifdef DEBUG |
520 if (FLAG_trace_regexp_bytecodes) { | 607 if (FLAG_trace_regexp_bytecodes) { |
521 String* pattern = regexp->Pattern(); | 608 String* pattern = regexp->Pattern(); |
522 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
523 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
524 } | 611 } |
525 #endif | 612 #endif |
613 HandleScope scope; | |
526 matches = IrregexpExecOnce(irregexp, | 614 matches = IrregexpExecOnce(irregexp, |
527 IrregexpNumberOfCaptures(irregexp), | 615 number_of_capture_registers, |
616 last_match_info, | |
528 subject, | 617 subject, |
529 previous_index, | 618 previous_index, |
530 offsets.vector(), | 619 offsets.vector(), |
531 offsets.length()); | 620 offsets.length()); |
532 | 621 |
533 if (matches.is_null()) { | 622 if (matches.is_null()) { |
534 ASSERT(Top::has_pending_exception()); | 623 ASSERT(Top::has_pending_exception()); |
535 return matches; | 624 return matches; |
536 } | 625 } |
537 | 626 |
538 if (matches->IsJSArray()) { | 627 if (matches->IsJSArray()) { |
539 SetElement(result, i, matches); | 628 // Create an array that looks like the static last_match_info array |
540 i++; | 629 // that is attached to the global RegExp object. We will be returning |
541 previous_index = offsets.vector()[1]; | 630 // an array of these. |
542 if (offsets.vector()[0] == offsets.vector()[1]) { | 631 Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); |
632 Handle<JSArray> latest_match = | |
633 Factory::NewJSArray(kFirstCapture + number_of_capture_registers); | |
634 Handle<FixedArray> latest_match_array(latest_match->elements()); | |
635 | |
636 for (int i = 0; i < number_of_capture_registers; i++) { | |
637 SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); | |
638 } | |
639 SetLastCaptureCount(*latest_match_array, number_of_capture_registers); | |
640 | |
641 SetElement(result, result_length, latest_match); | |
642 result_length++; | |
643 previous_index = GetCapture(*matches_array, 1); | |
644 if (GetCapture(*matches_array, 0) == previous_index) | |
543 previous_index++; | 645 previous_index++; |
544 } | 646 |
545 } else { | 647 } else { |
546 ASSERT(matches->IsNull()); | 648 ASSERT(matches->IsNull()); |
547 return result; | 649 return result; |
548 } | 650 } |
549 } | 651 } |
550 } | 652 } |
551 } | 653 } |
552 | 654 |
553 | 655 |
554 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, | 656 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp, |
555 int num_captures, | 657 int number_of_capture_registers, |
658 Handle<JSArray> last_match_info, | |
556 Handle<String> subject, | 659 Handle<String> subject, |
557 int previous_index, | 660 int previous_index, |
558 int* offsets_vector, | 661 int* offsets_vector, |
559 int offsets_vector_length) { | 662 int offsets_vector_length) { |
560 ASSERT(subject->IsFlat(StringShape(*subject))); | 663 StringShape shape(*subject); |
664 ASSERT(subject->IsFlat(shape)); | |
665 bool is_ascii = shape.IsAsciiRepresentation(); | |
561 bool rc; | 666 bool rc; |
562 | 667 |
563 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | 668 Handle<String> original_subject = subject; |
669 if (FLAG_regexp_native) { | |
670 #ifndef ARM | |
671 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); | |
564 | 672 |
565 switch (tag) { | 673 // Character offsets into string. |
566 case RegExpMacroAssembler::kIA32Implementation: { | 674 int start_offset = previous_index; |
567 #ifndef ARM | 675 int end_offset = subject->length(shape); |
568 Handle<Code> code = IrregexpNativeCode(irregexp); | |
569 | 676 |
570 StringShape shape(*subject); | 677 if (shape.IsCons()) { |
678 subject = Handle<String>(ConsString::cast(*subject)->first()); | |
679 } else if (shape.IsSliced()) { | |
680 SlicedString* slice = SlicedString::cast(*subject); | |
681 start_offset += slice->start(); | |
682 end_offset += slice->start(); | |
683 subject = Handle<String>(slice->buffer()); | |
684 } | |
571 | 685 |
572 // Character offsets into string. | 686 // String is now either Sequential or External |
573 int start_offset = previous_index; | 687 StringShape flatshape(*subject); |
574 int end_offset = subject->length(shape); | 688 bool is_ascii = flatshape.IsAsciiRepresentation(); |
689 int char_size_shift = is_ascii ? 0 : 1; | |
575 | 690 |
576 if (shape.IsCons()) { | 691 RegExpMacroAssemblerIA32::Result res; |
577 subject = Handle<String>(ConsString::cast(*subject)->first()); | 692 |
578 } else if (shape.IsSliced()) { | 693 if (flatshape.IsExternal()) { |
579 SlicedString* slice = SlicedString::cast(*subject); | 694 const byte* address; |
580 start_offset += slice->start(); | 695 if (is_ascii) { |
581 end_offset += slice->start(); | 696 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
582 subject = Handle<String>(slice->buffer()); | 697 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
698 } else { | |
699 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | |
700 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
583 } | 701 } |
702 res = RegExpMacroAssemblerIA32::Execute( | |
703 *code, | |
704 const_cast<Address*>(&address), | |
705 start_offset << char_size_shift, | |
706 end_offset << char_size_shift, | |
707 offsets_vector, | |
708 previous_index == 0); | |
709 } else { // Sequential string | |
710 ASSERT(StringShape(*subject).IsSequential()); | |
711 Address char_address = | |
712 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | |
713 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | |
714 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | |
715 res = RegExpMacroAssemblerIA32::Execute( | |
716 *code, | |
717 reinterpret_cast<Address*>(subject.location()), | |
718 byte_offset + (start_offset << char_size_shift), | |
719 byte_offset + (end_offset << char_size_shift), | |
720 offsets_vector, | |
721 previous_index == 0); | |
722 } | |
584 | 723 |
585 // String is now either Sequential or External | 724 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
586 StringShape flatshape(*subject); | 725 ASSERT(Top::has_pending_exception()); |
587 bool is_ascii = flatshape.IsAsciiRepresentation(); | 726 return Handle<Object>::null(); |
588 int char_size_shift = is_ascii ? 0 : 1; | 727 } |
728 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); | |
589 | 729 |
590 RegExpMacroAssemblerIA32::Result res; | 730 if (rc) { |
591 | 731 // Capture values are relative to start_offset only. |
592 if (flatshape.IsExternal()) { | 732 for (int i = 0; i < offsets_vector_length; i++) { |
593 const byte* address; | 733 if (offsets_vector[i] >= 0) { |
594 if (is_ascii) { | 734 offsets_vector[i] += previous_index; |
595 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); | |
596 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
597 } else { | |
598 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | |
599 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
600 } | |
601 res = RegExpMacroAssemblerIA32::Execute( | |
602 *code, | |
603 const_cast<Address*>(&address), | |
604 start_offset << char_size_shift, | |
605 end_offset << char_size_shift, | |
606 offsets_vector, | |
607 previous_index == 0); | |
608 } else { // Sequential string | |
609 ASSERT(StringShape(*subject).IsSequential()); | |
610 Address char_address = | |
611 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | |
612 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | |
613 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | |
614 res = RegExpMacroAssemblerIA32::Execute( | |
615 *code, | |
616 reinterpret_cast<Address*>(subject.location()), | |
617 byte_offset + (start_offset << char_size_shift), | |
618 byte_offset + (end_offset << char_size_shift), | |
619 offsets_vector, | |
620 previous_index == 0); | |
621 } | |
622 | |
623 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { | |
624 ASSERT(Top::has_pending_exception()); | |
625 return Handle<Object>::null(); | |
626 } | |
627 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); | |
628 | |
629 if (rc) { | |
630 // Capture values are relative to start_offset only. | |
631 for (int i = 0; i < offsets_vector_length; i++) { | |
632 if (offsets_vector[i] >= 0) { | |
633 offsets_vector[i] += previous_index; | |
634 } | |
635 } | 735 } |
636 } | 736 } |
637 break; | 737 } |
738 } else { | |
638 #else | 739 #else |
639 UNIMPLEMENTED(); | 740 // Unimplemented on ARM, fall through to bytecode. |
Mads Ager (chromium)
2009/03/11 13:49:17
Auch, this is hard to read. Can we factor this di
Lasse Reichstein
2009/03/11 13:54:03
I'm all for factoring it differently, but I think
Erik Corry
2009/03/11 14:01:06
I'll leave it alone for now.
| |
640 rc = false; | 741 } |
641 break; | 742 { |
642 #endif | 743 #endif |
744 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
745 offsets_vector[i] = -1; | |
643 } | 746 } |
644 case RegExpMacroAssembler::kBytecodeImplementation: { | 747 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); |
645 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | |
646 offsets_vector[i] = -1; | |
647 } | |
648 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); | |
649 | 748 |
650 rc = IrregexpInterpreter::Match(byte_codes, | 749 rc = IrregexpInterpreter::Match(byte_codes, |
651 subject, | 750 subject, |
652 offsets_vector, | 751 offsets_vector, |
653 previous_index); | 752 previous_index); |
654 break; | |
655 } | |
656 case RegExpMacroAssembler::kARMImplementation: | |
657 default: | |
658 UNREACHABLE(); | |
659 rc = false; | |
660 break; | |
661 } | 753 } |
662 | 754 |
663 if (!rc) { | 755 if (!rc) { |
664 return Factory::null_value(); | 756 return Factory::null_value(); |
665 } | 757 } |
666 | 758 |
667 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 759 FixedArray* array = last_match_info->elements(); |
760 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
668 // The captures come in (start, end+1) pairs. | 761 // The captures come in (start, end+1) pairs. |
669 for (int i = 0; i < 2 * (num_captures + 1); i += 2) { | 762 for (int i = 0; i < number_of_capture_registers; i += 2) { |
670 array->set(i, Smi::FromInt(offsets_vector[i])); | 763 SetCapture(array, i, offsets_vector[i]); |
671 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); | 764 SetCapture(array, i + 1, offsets_vector[i + 1]); |
672 } | 765 } |
673 return Factory::NewJSArrayWithElements(array); | 766 SetLastCaptureCount(array, number_of_capture_registers); |
767 SetLastSubject(array, *original_subject); | |
768 SetLastInput(array, *original_subject); | |
769 return last_match_info; | |
674 } | 770 } |
675 | 771 |
676 | 772 |
677 // ------------------------------------------------------------------- | 773 // ------------------------------------------------------------------- |
678 // Implmentation of the Irregexp regular expression engine. | 774 // Implementation of the Irregexp regular expression engine. |
679 // | 775 // |
680 // The Irregexp regular expression engine is intended to be a complete | 776 // The Irregexp regular expression engine is intended to be a complete |
681 // implementation of ECMAScript regular expressions. It generates either | 777 // implementation of ECMAScript regular expressions. It generates either |
682 // bytecodes or native code. | 778 // bytecodes or native code. |
683 | 779 |
684 // The Irregexp regexp engine is structured in three steps. | 780 // The Irregexp regexp engine is structured in three steps. |
685 // 1) The parser generates an abstract syntax tree. See ast.cc. | 781 // 1) The parser generates an abstract syntax tree. See ast.cc. |
686 // 2) From the AST a node network is created. The nodes are all | 782 // 2) From the AST a node network is created. The nodes are all |
687 // subclasses of RegExpNode. The nodes represent states when | 783 // subclasses of RegExpNode. The nodes represent states when |
688 // executing a regular expression. Several optimizations are | 784 // executing a regular expression. Several optimizations are |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
885 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); | 981 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); |
886 | 982 |
887 int AllocateRegister() { | 983 int AllocateRegister() { |
888 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { | 984 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { |
889 reg_exp_too_big_ = true; | 985 reg_exp_too_big_ = true; |
890 return next_register_; | 986 return next_register_; |
891 } | 987 } |
892 return next_register_++; | 988 return next_register_++; |
893 } | 989 } |
894 | 990 |
895 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 991 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, |
896 RegExpNode* start, | 992 RegExpNode* start, |
897 int capture_count, | 993 int capture_count, |
898 Handle<String> pattern); | 994 Handle<String> pattern); |
899 | 995 |
900 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 996 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
901 | 997 |
902 static const int kImplementationOffset = 0; | 998 static const int kImplementationOffset = 0; |
903 static const int kNumberOfRegistersOffset = 0; | 999 static const int kNumberOfRegistersOffset = 0; |
904 static const int kCodeOffset = 1; | 1000 static const int kCodeOffset = 1; |
905 | 1001 |
906 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 1002 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
907 EndNode* accept() { return accept_; } | 1003 EndNode* accept() { return accept_; } |
908 | 1004 |
(...skipping 24 matching lines...) Expand all Loading... | |
933 public: | 1029 public: |
934 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 1030 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
935 compiler->IncrementRecursionDepth(); | 1031 compiler->IncrementRecursionDepth(); |
936 } | 1032 } |
937 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 1033 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
938 private: | 1034 private: |
939 RegExpCompiler* compiler_; | 1035 RegExpCompiler* compiler_; |
940 }; | 1036 }; |
941 | 1037 |
942 | 1038 |
943 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { | 1039 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
944 Handle<JSArray> array = Factory::NewJSArray(2); | 1040 return RegExpEngine::CompilationResult("RegExp too big"); |
945 SetElement(array, 0, pattern); | |
946 const char* message = "RegExp too big"; | |
947 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); | |
948 Handle<Object> regexp_err = | |
949 Factory::NewSyntaxError("malformed_regexp", array); | |
950 Top::Throw(*regexp_err); | |
951 return Handle<FixedArray>(); | |
952 } | 1041 } |
953 | 1042 |
954 | 1043 |
955 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1044 // Attempts to compile the regexp using an Irregexp code generator. Returns |
956 // a fixed array or a null handle depending on whether it succeeded. | 1045 // a fixed array or a null handle depending on whether it succeeded. |
957 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 1046 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
958 : next_register_(2 * (capture_count + 1)), | 1047 : next_register_(2 * (capture_count + 1)), |
959 work_list_(NULL), | 1048 work_list_(NULL), |
960 recursion_depth_(0), | 1049 recursion_depth_(0), |
961 ignore_case_(ignore_case), | 1050 ignore_case_(ignore_case), |
962 ascii_(ascii), | 1051 ascii_(ascii), |
963 reg_exp_too_big_(false) { | 1052 reg_exp_too_big_(false) { |
964 accept_ = new EndNode(EndNode::ACCEPT); | 1053 accept_ = new EndNode(EndNode::ACCEPT); |
965 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 1054 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
966 } | 1055 } |
967 | 1056 |
968 | 1057 |
969 Handle<FixedArray> RegExpCompiler::Assemble( | 1058 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
970 RegExpMacroAssembler* macro_assembler, | 1059 RegExpMacroAssembler* macro_assembler, |
971 RegExpNode* start, | 1060 RegExpNode* start, |
972 int capture_count, | 1061 int capture_count, |
973 Handle<String> pattern) { | 1062 Handle<String> pattern) { |
974 #ifdef DEBUG | 1063 #ifdef DEBUG |
975 if (FLAG_trace_regexp_assembler) | 1064 if (FLAG_trace_regexp_assembler) |
976 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | 1065 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
977 else | 1066 else |
978 #endif | 1067 #endif |
979 macro_assembler_ = macro_assembler; | 1068 macro_assembler_ = macro_assembler; |
980 List <RegExpNode*> work_list(0); | 1069 List <RegExpNode*> work_list(0); |
981 work_list_ = &work_list; | 1070 work_list_ = &work_list; |
982 Label fail; | 1071 Label fail; |
983 macro_assembler_->PushBacktrack(&fail); | 1072 macro_assembler_->PushBacktrack(&fail); |
984 Trace new_trace; | 1073 Trace new_trace; |
985 start->Emit(this, &new_trace); | 1074 start->Emit(this, &new_trace); |
986 macro_assembler_->Bind(&fail); | 1075 macro_assembler_->Bind(&fail); |
987 macro_assembler_->Fail(); | 1076 macro_assembler_->Fail(); |
988 while (!work_list.is_empty()) { | 1077 while (!work_list.is_empty()) { |
989 work_list.RemoveLast()->Emit(this, &new_trace); | 1078 work_list.RemoveLast()->Emit(this, &new_trace); |
990 } | 1079 } |
991 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); | 1080 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
992 Handle<FixedArray> array = | 1081 |
993 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); | |
994 array->set(RegExpImpl::kIrregexpImplementationIndex, | |
995 Smi::FromInt(macro_assembler_->Implementation())); | |
996 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, | |
997 Smi::FromInt(next_register_)); | |
998 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, | |
999 Smi::FromInt(capture_count)); | |
1000 Handle<Object> code = macro_assembler_->GetCode(pattern); | 1082 Handle<Object> code = macro_assembler_->GetCode(pattern); |
1001 array->set(RegExpImpl::kIrregexpCodeIndex, *code); | 1083 |
1002 work_list_ = NULL; | 1084 work_list_ = NULL; |
1003 #ifdef DEBUG | 1085 #ifdef DEBUG |
1004 if (FLAG_trace_regexp_assembler) { | 1086 if (FLAG_trace_regexp_assembler) { |
1005 delete macro_assembler_; | 1087 delete macro_assembler_; |
1006 } | 1088 } |
1007 #endif | 1089 #endif |
1008 return array; | 1090 return RegExpEngine::CompilationResult(*code, next_register_); |
1009 } | 1091 } |
1010 | 1092 |
1011 | 1093 |
1012 bool Trace::DeferredAction::Mentions(int that) { | 1094 bool Trace::DeferredAction::Mentions(int that) { |
1013 if (type() == ActionNode::CLEAR_CAPTURES) { | 1095 if (type() == ActionNode::CLEAR_CAPTURES) { |
1014 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 1096 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
1015 return range.Contains(that); | 1097 return range.Contains(that); |
1016 } else { | 1098 } else { |
1017 return reg() == that; | 1099 return reg() == that; |
1018 } | 1100 } |
(...skipping 2697 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3716 // x{f, t} becomes this: | 3798 // x{f, t} becomes this: |
3717 // | 3799 // |
3718 // (r++)<-. | 3800 // (r++)<-. |
3719 // | ` | 3801 // | ` |
3720 // | (x) | 3802 // | (x) |
3721 // v ^ | 3803 // v ^ |
3722 // (r=0)-->(?)---/ [if r < t] | 3804 // (r=0)-->(?)---/ [if r < t] |
3723 // | | 3805 // | |
3724 // [if r >= f] \----> ... | 3806 // [if r >= f] \----> ... |
3725 // | 3807 // |
3726 // | |
3727 // TODO(someone): clear captures on repetition and handle empty | |
3728 // matches. | |
3729 | 3808 |
3730 // 15.10.2.5 RepeatMatcher algorithm. | 3809 // 15.10.2.5 RepeatMatcher algorithm. |
3731 // The parser has already eliminated the case where max is 0. In the case | 3810 // The parser has already eliminated the case where max is 0. In the case |
3732 // where max_match is zero the parser has removed the quantifier if min was | 3811 // where max_match is zero the parser has removed the quantifier if min was |
3733 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. | 3812 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. |
3734 | 3813 |
3735 // If we know that we cannot match zero length then things are a little | 3814 // If we know that we cannot match zero length then things are a little |
3736 // simpler since we don't need to make the special zero length match check | 3815 // simpler since we don't need to make the special zero length match check |
3737 // from step 2.1. If the min and max are small we can unroll a little in | 3816 // from step 2.1. If the min and max are small we can unroll a little in |
3738 // this case. | 3817 // this case. |
(...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4585 } | 4664 } |
4586 } | 4665 } |
4587 | 4666 |
4588 | 4667 |
4589 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 4668 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
4590 RegExpNode* target = that->on_success(); | 4669 RegExpNode* target = that->on_success(); |
4591 target->Accept(this); | 4670 target->Accept(this); |
4592 } | 4671 } |
4593 | 4672 |
4594 | 4673 |
4595 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, | 4674 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, |
4596 bool ignore_case, | 4675 bool ignore_case, |
4597 bool is_multiline, | 4676 bool is_multiline, |
4598 Handle<String> pattern, | 4677 Handle<String> pattern, |
4599 bool is_ascii) { | 4678 bool is_ascii) { |
4600 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 4679 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
4601 return IrregexpRegExpTooBig(pattern); | 4680 return IrregexpRegExpTooBig(); |
4602 } | 4681 } |
4603 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); | 4682 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
4604 // Wrap the body of the regexp in capture #0. | 4683 // Wrap the body of the regexp in capture #0. |
4605 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4684 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
4606 0, | 4685 0, |
4607 &compiler, | 4686 &compiler, |
4608 compiler.accept()); | 4687 compiler.accept()); |
4609 RegExpNode* node = captured_body; | 4688 RegExpNode* node = captured_body; |
4610 if (!data->tree->IsAnchored()) { | 4689 if (!data->tree->IsAnchored()) { |
4611 // Add a .*? at the beginning, outside the body capture, unless | 4690 // Add a .*? at the beginning, outside the body capture, unless |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4658 EmbeddedVector<byte, 1024> codes; | 4737 EmbeddedVector<byte, 1024> codes; |
4659 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4738 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
4660 return compiler.Assemble(¯o_assembler, | 4739 return compiler.Assemble(¯o_assembler, |
4661 node, | 4740 node, |
4662 data->capture_count, | 4741 data->capture_count, |
4663 pattern); | 4742 pattern); |
4664 } | 4743 } |
4665 | 4744 |
4666 | 4745 |
4667 }} // namespace v8::internal | 4746 }} // namespace v8::internal |
OLD | NEW |