OLD | NEW |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
206 Handle<String> pattern, | 206 Handle<String> pattern, |
207 Handle<String> flag_str) { | 207 Handle<String> flag_str) { |
208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
210 bool in_cache = !cached.is_null(); | 210 bool in_cache = !cached.is_null(); |
211 LOG(RegExpCompileEvent(re, in_cache)); | 211 LOG(RegExpCompileEvent(re, in_cache)); |
212 | 212 |
213 Handle<Object> result; | 213 Handle<Object> result; |
214 if (in_cache) { | 214 if (in_cache) { |
215 re->set_data(*cached); | 215 re->set_data(*cached); |
216 return re; | 216 result = re; |
217 } | 217 } else { |
218 FlattenString(pattern); | 218 FlattenString(pattern); |
219 ZoneScope zone_scope(DELETE_ON_EXIT); | 219 ZoneScope zone_scope(DELETE_ON_EXIT); |
220 RegExpCompileData parse_result; | 220 RegExpCompileData parse_result; |
221 FlatStringReader reader(pattern); | 221 FlatStringReader reader(pattern); |
222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
223 // Throw an exception if we fail to parse the pattern. | 223 // Throw an exception if we fail to parse the pattern. |
224 ThrowRegExpException(re, | 224 ThrowRegExpException(re, |
225 pattern, | 225 pattern, |
226 parse_result.error, | 226 parse_result.error, |
227 "malformed_regexp"); | 227 "malformed_regexp"); |
228 return Handle<Object>::null(); | 228 return Handle<Object>::null(); |
| 229 } |
| 230 |
| 231 if (parse_result.simple && !flags.is_ignore_case()) { |
| 232 // Parse-tree is a single atom that is equal to the pattern. |
| 233 result = AtomCompile(re, pattern, flags, pattern); |
| 234 } else if (parse_result.tree->IsAtom() && |
| 235 !flags.is_ignore_case() && |
| 236 parse_result.capture_count == 0) { |
| 237 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 238 Vector<const uc16> atom_pattern = atom->data(); |
| 239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| 240 result = AtomCompile(re, pattern, flags, atom_string); |
| 241 } else { |
| 242 result = IrregexpPrepare(re, pattern, flags); |
| 243 } |
| 244 Object* data = re->data(); |
| 245 if (data->IsFixedArray()) { |
| 246 // If compilation succeeded then the data is set on the regexp |
| 247 // and we can store it in the cache. |
| 248 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 249 CompilationCache::PutRegExp(pattern, flags, data); |
| 250 } |
229 } | 251 } |
230 | 252 |
231 if (parse_result.simple && !flags.is_ignore_case()) { | 253 return result; |
232 // Parse-tree is a single atom that is equal to the pattern. | |
233 AtomCompile(re, pattern, flags, pattern); | |
234 } else if (parse_result.tree->IsAtom() && | |
235 !flags.is_ignore_case() && | |
236 parse_result.capture_count == 0) { | |
237 RegExpAtom* atom = parse_result.tree->AsAtom(); | |
238 Vector<const uc16> atom_pattern = atom->data(); | |
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | |
240 AtomCompile(re, pattern, flags, atom_string); | |
241 } else { | |
242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | |
243 } | |
244 ASSERT(re->data()->IsFixedArray()); | |
245 // Compilation succeeded so the data is set on the regexp | |
246 // and we can store it in the cache. | |
247 Handle<FixedArray> data(FixedArray::cast(re->data())); | |
248 CompilationCache::PutRegExp(pattern, flags, data); | |
249 | |
250 return re; | |
251 } | 254 } |
252 | 255 |
253 | 256 |
254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
255 Handle<String> subject, | 258 Handle<String> subject, |
256 int index, | 259 Handle<Object> index) { |
257 Handle<JSArray> last_match_info) { | |
258 switch (regexp->TypeTag()) { | 260 switch (regexp->TypeTag()) { |
259 case JSRegExp::ATOM: | 261 case JSRegExp::ATOM: |
260 return AtomExec(regexp, subject, index, last_match_info); | 262 return AtomExec(regexp, subject, index); |
261 case JSRegExp::IRREGEXP: { | 263 case JSRegExp::IRREGEXP: { |
262 Handle<Object> result = | 264 Handle<Object> result = IrregexpExec(regexp, subject, index); |
263 IrregexpExec(regexp, subject, index, last_match_info); | |
264 ASSERT(!result.is_null() || Top::has_pending_exception()); | 265 ASSERT(!result.is_null() || Top::has_pending_exception()); |
265 return result; | 266 return result; |
266 } | 267 } |
267 default: | 268 default: |
268 UNREACHABLE(); | 269 UNREACHABLE(); |
269 return Handle<Object>::null(); | 270 return Handle<Object>::null(); |
270 } | 271 } |
271 } | 272 } |
272 | 273 |
273 | 274 |
274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 275 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
275 Handle<String> subject, | 276 Handle<String> subject) { |
276 Handle<JSArray> last_match_info) { | |
277 switch (regexp->TypeTag()) { | 277 switch (regexp->TypeTag()) { |
278 case JSRegExp::ATOM: | 278 case JSRegExp::ATOM: |
279 return AtomExecGlobal(regexp, subject, last_match_info); | 279 return AtomExecGlobal(regexp, subject); |
280 case JSRegExp::IRREGEXP: { | 280 case JSRegExp::IRREGEXP: { |
281 Handle<Object> result = | 281 Handle<Object> result = IrregexpExecGlobal(regexp, subject); |
282 IrregexpExecGlobal(regexp, subject, last_match_info); | |
283 ASSERT(!result.is_null() || Top::has_pending_exception()); | 282 ASSERT(!result.is_null() || Top::has_pending_exception()); |
284 return result; | 283 return result; |
285 } | 284 } |
286 default: | 285 default: |
287 UNREACHABLE(); | 286 UNREACHABLE(); |
288 return Handle<Object>::null(); | 287 return Handle<Object>::null(); |
289 } | 288 } |
290 } | 289 } |
291 | 290 |
292 | 291 |
293 // RegExp Atom implementation: Simple string search using indexOf. | 292 // RegExp Atom implementation: Simple string search using indexOf. |
294 | 293 |
295 | 294 |
296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
297 Handle<String> pattern, | 296 Handle<String> pattern, |
298 JSRegExp::Flags flags, | 297 JSRegExp::Flags flags, |
299 Handle<String> match_pattern) { | 298 Handle<String> match_pattern) { |
300 Factory::SetRegExpAtomData(re, | 299 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); |
301 JSRegExp::ATOM, | 300 return re; |
302 pattern, | |
303 flags, | |
304 match_pattern); | |
305 } | |
306 | |
307 | |
308 static void SetAtomLastCapture(FixedArray* array, | |
309 String* subject, | |
310 int from, | |
311 int to) { | |
312 NoHandleAllocation no_handles; | |
313 RegExpImpl::SetLastCaptureCount(array, 2); | |
314 RegExpImpl::SetLastSubject(array, subject); | |
315 RegExpImpl::SetLastInput(array, subject); | |
316 RegExpImpl::SetCapture(array, 0, from); | |
317 RegExpImpl::SetCapture(array, 1, to); | |
318 } | 301 } |
319 | 302 |
320 | 303 |
321 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 304 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
322 Handle<String> subject, | 305 Handle<String> subject, |
323 int index, | 306 Handle<Object> index) { |
324 Handle<JSArray> last_match_info) { | |
325 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 307 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
326 | 308 |
327 uint32_t start_index = index; | 309 uint32_t start_index; |
| 310 if (!Array::IndexFromObject(*index, &start_index)) { |
| 311 return Handle<Smi>(Smi::FromInt(-1)); |
| 312 } |
328 | 313 |
329 int value = Runtime::StringMatch(subject, needle, start_index); | 314 int value = Runtime::StringMatch(subject, needle, start_index); |
330 if (value == -1) return Factory::null_value(); | 315 if (value == -1) return Factory::null_value(); |
331 ASSERT(last_match_info->HasFastElements()); | |
332 | 316 |
333 { | 317 Handle<FixedArray> array = Factory::NewFixedArray(2); |
334 NoHandleAllocation no_handles; | 318 array->set(0, Smi::FromInt(value)); |
335 FixedArray* array = last_match_info->elements(); | 319 array->set(1, Smi::FromInt(value + needle->length())); |
336 SetAtomLastCapture(array, *subject, value, value + needle->length()); | 320 return Factory::NewJSArrayWithElements(array); |
337 } | |
338 return last_match_info; | |
339 } | 321 } |
340 | 322 |
341 | 323 |
342 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, | 324 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
343 Handle<String> subject, | 325 Handle<String> subject) { |
344 Handle<JSArray> last_match_info) { | |
345 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 326 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
346 ASSERT(last_match_info->HasFastElements()); | |
347 Handle<JSArray> result = Factory::NewJSArray(1); | 327 Handle<JSArray> result = Factory::NewJSArray(1); |
348 int index = 0; | 328 int index = 0; |
349 int match_count = 0; | 329 int match_count = 0; |
350 int subject_length = subject->length(); | 330 int subject_length = subject->length(); |
351 int needle_length = needle->length(); | 331 int needle_length = needle->length(); |
352 int last_value = -1; | |
353 while (true) { | 332 while (true) { |
354 HandleScope scope; | |
355 int value = -1; | 333 int value = -1; |
356 if (index + needle_length <= subject_length) { | 334 if (index + needle_length <= subject_length) { |
357 value = Runtime::StringMatch(subject, needle, index); | 335 value = Runtime::StringMatch(subject, needle, index); |
358 } | 336 } |
359 if (value == -1) { | 337 if (value == -1) break; |
360 if (last_value != -1) { | 338 HandleScope scope; |
361 Handle<FixedArray> array(last_match_info->elements()); | |
362 SetAtomLastCapture(*array, | |
363 *subject, | |
364 last_value, | |
365 last_value + needle->length()); | |
366 } | |
367 break; | |
368 } | |
369 | |
370 int end = value + needle_length; | 339 int end = value + needle_length; |
371 | 340 |
372 // Create an array that looks like the static last_match_info array | 341 Handle<FixedArray> array = Factory::NewFixedArray(2); |
373 // that is attached to the global RegExp object. We will be returning | 342 array->set(0, Smi::FromInt(value)); |
374 // an array of these. | 343 array->set(1, Smi::FromInt(end)); |
375 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); | |
376 SetCapture(*array, 0, value); | |
377 SetCapture(*array, 1, end); | |
378 SetLastCaptureCount(*array, 2); | |
379 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); | 344 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
380 SetElement(result, match_count, pair); | 345 SetElement(result, match_count, pair); |
381 match_count++; | 346 match_count++; |
382 index = end; | 347 index = end; |
383 if (needle_length == 0) index++; | 348 if (needle_length == 0) index++; |
384 last_value = value; | |
385 } | 349 } |
386 return result; | 350 return result; |
387 } | 351 } |
388 | 352 |
389 | 353 |
390 // Irregexp implementation. | 354 // Irregexp implementation. |
391 | 355 |
392 | 356 |
393 // Ensures that the regexp object contains a compiled version of the | 357 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII |
394 // source for either ASCII or non-ASCII strings. | 358 // strings. If the compiled version doesn't already exist, it is compiled |
395 // If the compiled version doesn't already exist, it is compiled | |
396 // from the source pattern. | 359 // from the source pattern. |
397 // If compilation fails, an exception is thrown and this function | 360 // Irregexp is not feature complete yet. If there is something in the |
398 // returns false. | 361 // regexp that the compiler cannot currently handle, an empty |
399 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, | 362 // handle is returned, but no exception is thrown. |
400 bool is_ascii) { | 363 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, |
401 int index; | 364 bool is_ascii) { |
402 if (is_ascii) { | 365 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
403 index = JSRegExp::kIrregexpASCIICodeIndex; | 366 Handle<FixedArray> alternatives( |
404 } else { | 367 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); |
405 index = JSRegExp::kIrregexpUC16CodeIndex; | 368 ASSERT_EQ(2, alternatives->length()); |
406 } | 369 |
407 Object* entry = re->DataAt(index); | 370 int index = is_ascii ? 0 : 1; |
408 if (!entry->IsTheHole()) { | 371 Object* entry = alternatives->get(index); |
409 // A value has already been compiled. | 372 if (!entry->IsNull()) { |
410 if (entry->IsJSObject()) { | 373 return Handle<FixedArray>(FixedArray::cast(entry)); |
411 // If it's a JS value, it's an error. | |
412 Top::Throw(entry); | |
413 return false; | |
414 } | |
415 return true; | |
416 } | 374 } |
417 | 375 |
418 // Compile the RegExp. | 376 // Compile the RegExp. |
419 ZoneScope zone_scope(DELETE_ON_EXIT); | 377 ZoneScope zone_scope(DELETE_ON_EXIT); |
420 | 378 |
421 JSRegExp::Flags flags = re->GetFlags(); | 379 JSRegExp::Flags flags = re->GetFlags(); |
422 | 380 |
423 Handle<String> pattern(re->Pattern()); | 381 Handle<String> pattern(re->Pattern()); |
424 if (!pattern->IsFlat(StringShape(*pattern))) { | 382 if (!pattern->IsFlat(StringShape(*pattern))) { |
425 FlattenString(pattern); | 383 FlattenString(pattern); |
426 } | 384 } |
427 | 385 |
428 RegExpCompileData compile_data; | 386 RegExpCompileData compile_data; |
429 FlatStringReader reader(pattern); | 387 FlatStringReader reader(pattern); |
430 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { | 388 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { |
431 // Throw an exception if we fail to parse the pattern. | 389 // Throw an exception if we fail to parse the pattern. |
432 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. | 390 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. |
433 ThrowRegExpException(re, | 391 ThrowRegExpException(re, |
434 pattern, | 392 pattern, |
435 compile_data.error, | 393 compile_data.error, |
436 "malformed_regexp"); | 394 "malformed_regexp"); |
437 return false; | 395 return Handle<FixedArray>::null(); |
438 } | 396 } |
439 RegExpEngine::CompilationResult result = | 397 Handle<FixedArray> compiled_entry = |
440 RegExpEngine::Compile(&compile_data, | 398 RegExpEngine::Compile(&compile_data, |
441 flags.is_ignore_case(), | 399 flags.is_ignore_case(), |
442 flags.is_multiline(), | 400 flags.is_multiline(), |
443 pattern, | 401 pattern, |
444 is_ascii); | 402 is_ascii); |
445 if (result.error_message != NULL) { | 403 if (!compiled_entry.is_null()) { |
446 // Unable to compile regexp. | 404 alternatives->set(index, *compiled_entry); |
447 Handle<JSArray> array = Factory::NewJSArray(2); | |
448 SetElement(array, 0, pattern); | |
449 SetElement(array, | |
450 1, | |
451 Factory::NewStringFromUtf8(CStrVector(result.error_message))); | |
452 Handle<Object> regexp_err = | |
453 Factory::NewSyntaxError("malformed_regexp", array); | |
454 Top::Throw(*regexp_err); | |
455 re->SetDataAt(index, *regexp_err); | |
456 return false; | |
457 } | 405 } |
458 | 406 return compiled_entry; |
459 NoHandleAllocation no_handles; | |
460 | |
461 FixedArray* data = FixedArray::cast(re->data()); | |
462 data->set(index, result.code); | |
463 int register_max = IrregexpMaxRegisterCount(data); | |
464 if (result.num_registers > register_max) { | |
465 SetIrregexpMaxRegisterCount(data, result.num_registers); | |
466 } | |
467 | |
468 return true; | |
469 } | 407 } |
470 | 408 |
471 | 409 |
472 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { | 410 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { |
473 return Smi::cast( | 411 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); |
474 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); | |
475 } | 412 } |
476 | 413 |
477 | 414 |
478 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { | 415 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { |
479 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); | 416 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); |
480 } | 417 } |
481 | 418 |
482 | 419 |
483 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { | 420 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { |
484 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); | 421 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() |
| 422 == RegExpMacroAssembler::kBytecodeImplementation); |
| 423 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex))); |
485 } | 424 } |
486 | 425 |
487 | 426 |
488 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { | 427 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { |
489 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); | 428 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() |
| 429 != RegExpMacroAssembler::kBytecodeImplementation); |
| 430 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex))); |
490 } | 431 } |
491 | 432 |
492 | 433 |
493 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { | 434 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
494 int index; | 435 Handle<String> pattern, |
495 if (is_ascii) { | 436 JSRegExp::Flags flags) { |
496 index = JSRegExp::kIrregexpASCIICodeIndex; | 437 // Make space for ASCII and UC16 versions. |
497 } else { | 438 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); |
498 index = JSRegExp::kIrregexpUC16CodeIndex; | 439 alternatives->set_null(0); |
499 } | 440 alternatives->set_null(1); |
500 return ByteArray::cast(re->get(index)); | 441 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); |
501 } | 442 return re; |
502 | |
503 | |
504 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | |
505 int index; | |
506 if (is_ascii) { | |
507 index = JSRegExp::kIrregexpASCIICodeIndex; | |
508 } else { | |
509 index = JSRegExp::kIrregexpUC16CodeIndex; | |
510 } | |
511 return Code::cast(re->get(index)); | |
512 } | |
513 | |
514 | |
515 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | |
516 Handle<String> pattern, | |
517 JSRegExp::Flags flags, | |
518 int capture_count) { | |
519 // Initialize compiled code entries to null. | |
520 Factory::SetRegExpIrregexpData(re, | |
521 JSRegExp::IRREGEXP, | |
522 pattern, | |
523 flags, | |
524 capture_count); | |
525 } | 443 } |
526 | 444 |
527 | 445 |
528 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 446 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
529 Handle<String> subject, | 447 Handle<String> subject, |
530 int index, | 448 Handle<Object> index) { |
531 Handle<JSArray> last_match_info) { | |
532 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 449 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 450 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
533 | 451 |
534 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 452 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
535 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | 453 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); |
| 454 if (irregexp.is_null()) { |
| 455 // We can't handle the RegExp with IRRegExp. |
536 return Handle<Object>::null(); | 456 return Handle<Object>::null(); |
537 } | 457 } |
538 | 458 |
539 // Prepare space for the return values. | 459 // Prepare space for the return values. |
540 Handle<FixedArray> re_data(FixedArray::cast(regexp->data())); | 460 int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
541 int number_of_capture_registers = | 461 OffsetsVector offsets(number_of_registers); |
542 (IrregexpNumberOfCaptures(*re_data) + 1) * 2; | |
543 OffsetsVector offsets(number_of_capture_registers); | |
544 | 462 |
545 int previous_index = index; | 463 int num_captures = IrregexpNumberOfCaptures(irregexp); |
| 464 |
| 465 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
546 | 466 |
547 #ifdef DEBUG | 467 #ifdef DEBUG |
548 if (FLAG_trace_regexp_bytecodes) { | 468 if (FLAG_trace_regexp_bytecodes) { |
549 String* pattern = regexp->Pattern(); | 469 String* pattern = regexp->Pattern(); |
550 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 470 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
551 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 471 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
552 } | 472 } |
553 #endif | 473 #endif |
554 | 474 |
555 if (!subject->IsFlat(StringShape(*subject))) { | 475 if (!subject->IsFlat(StringShape(*subject))) { |
556 FlattenString(subject); | 476 FlattenString(subject); |
557 } | 477 } |
558 | 478 |
559 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | 479 return IrregexpExecOnce(irregexp, |
560 | 480 num_captures, |
561 return IrregexpExecOnce(re_data, | |
562 number_of_capture_registers, | |
563 last_match_info, | |
564 subject, | 481 subject, |
565 previous_index, | 482 previous_index, |
566 offsets.vector(), | 483 offsets.vector(), |
567 offsets.length()); | 484 offsets.length()); |
568 } | 485 } |
569 | 486 |
570 | 487 |
571 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 488 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
572 Handle<String> subject, | 489 Handle<String> subject) { |
573 Handle<JSArray> last_match_info) { | |
574 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 490 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
575 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | |
576 | 491 |
577 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 492 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
578 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | 493 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); |
| 494 if (irregexp.is_null()) { |
579 return Handle<Object>::null(); | 495 return Handle<Object>::null(); |
580 } | 496 } |
581 | 497 |
582 // Prepare space for the return values. | 498 // Prepare space for the return values. |
583 int number_of_capture_registers = | 499 int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
584 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 500 OffsetsVector offsets(number_of_registers); |
585 OffsetsVector offsets(number_of_capture_registers); | |
586 | 501 |
587 int previous_index = 0; | 502 int previous_index = 0; |
588 | 503 |
589 Handle<JSArray> result = Factory::NewJSArray(0); | 504 Handle<JSArray> result = Factory::NewJSArray(0); |
590 int result_length = 0; | 505 int i = 0; |
591 Handle<Object> matches; | 506 Handle<Object> matches; |
592 | 507 |
593 if (!subject->IsFlat(StringShape(*subject))) { | 508 if (!subject->IsFlat(StringShape(*subject))) { |
594 FlattenString(subject); | 509 FlattenString(subject); |
595 } | 510 } |
596 | 511 |
597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
598 | |
599 while (true) { | 512 while (true) { |
600 if (previous_index > subject->length() || previous_index < 0) { | 513 if (previous_index > subject->length() || previous_index < 0) { |
601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 514 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
602 // string length, there is no match. | 515 // string length, there is no match. |
603 matches = Factory::null_value(); | 516 matches = Factory::null_value(); |
604 return result; | 517 return result; |
605 } else { | 518 } else { |
606 #ifdef DEBUG | 519 #ifdef DEBUG |
607 if (FLAG_trace_regexp_bytecodes) { | 520 if (FLAG_trace_regexp_bytecodes) { |
608 String* pattern = regexp->Pattern(); | 521 String* pattern = regexp->Pattern(); |
609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 522 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 523 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
611 } | 524 } |
612 #endif | 525 #endif |
613 HandleScope scope; | |
614 matches = IrregexpExecOnce(irregexp, | 526 matches = IrregexpExecOnce(irregexp, |
615 number_of_capture_registers, | 527 IrregexpNumberOfCaptures(irregexp), |
616 last_match_info, | |
617 subject, | 528 subject, |
618 previous_index, | 529 previous_index, |
619 offsets.vector(), | 530 offsets.vector(), |
620 offsets.length()); | 531 offsets.length()); |
621 | 532 |
622 if (matches.is_null()) { | 533 if (matches.is_null()) { |
623 ASSERT(Top::has_pending_exception()); | 534 ASSERT(Top::has_pending_exception()); |
624 return matches; | 535 return matches; |
625 } | 536 } |
626 | 537 |
627 if (matches->IsJSArray()) { | 538 if (matches->IsJSArray()) { |
628 // Create an array that looks like the static last_match_info array | 539 SetElement(result, i, matches); |
629 // that is attached to the global RegExp object. We will be returning | 540 i++; |
630 // an array of these. | 541 previous_index = offsets.vector()[1]; |
631 Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); | 542 if (offsets.vector()[0] == offsets.vector()[1]) { |
632 Handle<JSArray> latest_match = | 543 previous_index++; |
633 Factory::NewJSArray(kFirstCapture + number_of_capture_registers); | |
634 Handle<FixedArray> latest_match_array(latest_match->elements()); | |
635 | |
636 for (int i = 0; i < number_of_capture_registers; i++) { | |
637 SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); | |
638 } | 544 } |
639 SetLastCaptureCount(*latest_match_array, number_of_capture_registers); | |
640 | |
641 SetElement(result, result_length, latest_match); | |
642 result_length++; | |
643 previous_index = GetCapture(*matches_array, 1); | |
644 if (GetCapture(*matches_array, 0) == previous_index) | |
645 previous_index++; | |
646 | |
647 } else { | 545 } else { |
648 ASSERT(matches->IsNull()); | 546 ASSERT(matches->IsNull()); |
649 return result; | 547 return result; |
650 } | 548 } |
651 } | 549 } |
652 } | 550 } |
653 } | 551 } |
654 | 552 |
655 | 553 |
656 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp, | 554 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
657 int number_of_capture_registers, | 555 int num_captures, |
658 Handle<JSArray> last_match_info, | |
659 Handle<String> subject, | 556 Handle<String> subject, |
660 int previous_index, | 557 int previous_index, |
661 int* offsets_vector, | 558 int* offsets_vector, |
662 int offsets_vector_length) { | 559 int offsets_vector_length) { |
663 StringShape shape(*subject); | 560 ASSERT(subject->IsFlat(StringShape(*subject))); |
664 ASSERT(subject->IsFlat(shape)); | |
665 bool is_ascii = shape.IsAsciiRepresentation(); | |
666 bool rc; | 561 bool rc; |
667 | 562 |
668 if (FLAG_regexp_native) { | 563 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); |
| 564 |
| 565 switch (tag) { |
| 566 case RegExpMacroAssembler::kIA32Implementation: { |
669 #ifndef ARM | 567 #ifndef ARM |
670 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); | 568 Handle<Code> code = IrregexpNativeCode(irregexp); |
671 | 569 |
672 // Character offsets into string. | 570 StringShape shape(*subject); |
673 int start_offset = previous_index; | |
674 int end_offset = subject->length(shape); | |
675 | 571 |
676 if (shape.IsCons()) { | 572 // Character offsets into string. |
677 subject = Handle<String>(ConsString::cast(*subject)->first()); | 573 int start_offset = previous_index; |
678 } else if (shape.IsSliced()) { | 574 int end_offset = subject->length(shape); |
679 SlicedString* slice = SlicedString::cast(*subject); | |
680 start_offset += slice->start(); | |
681 end_offset += slice->start(); | |
682 subject = Handle<String>(slice->buffer()); | |
683 } | |
684 | 575 |
685 // String is now either Sequential or External | 576 if (shape.IsCons()) { |
686 StringShape flatshape(*subject); | 577 subject = Handle<String>(ConsString::cast(*subject)->first()); |
687 bool is_ascii = flatshape.IsAsciiRepresentation(); | 578 } else if (shape.IsSliced()) { |
688 int char_size_shift = is_ascii ? 0 : 1; | 579 SlicedString* slice = SlicedString::cast(*subject); |
| 580 start_offset += slice->start(); |
| 581 end_offset += slice->start(); |
| 582 subject = Handle<String>(slice->buffer()); |
| 583 } |
689 | 584 |
690 RegExpMacroAssemblerIA32::Result res; | 585 // String is now either Sequential or External |
| 586 StringShape flatshape(*subject); |
| 587 bool is_ascii = flatshape.IsAsciiRepresentation(); |
| 588 int char_size_shift = is_ascii ? 0 : 1; |
691 | 589 |
692 if (flatshape.IsExternal()) { | 590 RegExpMacroAssemblerIA32::Result res; |
693 const byte* address; | 591 |
694 if (is_ascii) { | 592 if (flatshape.IsExternal()) { |
695 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); | 593 const byte* address; |
696 address = reinterpret_cast<const byte*>(ext->resource()->data()); | 594 if (is_ascii) { |
697 } else { | 595 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
698 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | 596 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
699 address = reinterpret_cast<const byte*>(ext->resource()->data()); | 597 } else { |
| 598 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); |
| 599 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| 600 } |
| 601 res = RegExpMacroAssemblerIA32::Execute( |
| 602 *code, |
| 603 const_cast<Address*>(&address), |
| 604 start_offset << char_size_shift, |
| 605 end_offset << char_size_shift, |
| 606 offsets_vector, |
| 607 previous_index == 0); |
| 608 } else { // Sequential string |
| 609 ASSERT(StringShape(*subject).IsSequential()); |
| 610 Address char_address = |
| 611 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() |
| 612 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); |
| 613 int byte_offset = char_address - reinterpret_cast<Address>(*subject); |
| 614 res = RegExpMacroAssemblerIA32::Execute( |
| 615 *code, |
| 616 reinterpret_cast<Address*>(subject.location()), |
| 617 byte_offset + (start_offset << char_size_shift), |
| 618 byte_offset + (end_offset << char_size_shift), |
| 619 offsets_vector, |
| 620 previous_index == 0); |
700 } | 621 } |
701 res = RegExpMacroAssemblerIA32::Execute( | |
702 *code, | |
703 const_cast<Address*>(&address), | |
704 start_offset << char_size_shift, | |
705 end_offset << char_size_shift, | |
706 offsets_vector, | |
707 previous_index == 0); | |
708 } else { // Sequential string | |
709 ASSERT(StringShape(*subject).IsSequential()); | |
710 Address char_address = | |
711 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | |
712 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | |
713 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | |
714 res = RegExpMacroAssemblerIA32::Execute( | |
715 *code, | |
716 reinterpret_cast<Address*>(subject.location()), | |
717 byte_offset + (start_offset << char_size_shift), | |
718 byte_offset + (end_offset << char_size_shift), | |
719 offsets_vector, | |
720 previous_index == 0); | |
721 } | |
722 | 622 |
723 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { | 623 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
724 ASSERT(Top::has_pending_exception()); | 624 ASSERT(Top::has_pending_exception()); |
725 return Handle<Object>::null(); | 625 return Handle<Object>::null(); |
726 } | 626 } |
727 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); | 627 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); |
728 | 628 |
729 if (rc) { | 629 if (rc) { |
730 // Capture values are relative to start_offset only. | 630 // Capture values are relative to start_offset only. |
731 for (int i = 0; i < offsets_vector_length; i++) { | 631 for (int i = 0; i < offsets_vector_length; i++) { |
732 if (offsets_vector[i] >= 0) { | 632 if (offsets_vector[i] >= 0) { |
733 offsets_vector[i] += previous_index; | 633 offsets_vector[i] += previous_index; |
| 634 } |
734 } | 635 } |
735 } | 636 } |
| 637 break; |
| 638 #else |
| 639 UNIMPLEMENTED(); |
| 640 rc = false; |
| 641 break; |
| 642 #endif |
736 } | 643 } |
737 } else { | 644 case RegExpMacroAssembler::kBytecodeImplementation: { |
738 #else | 645 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
739 // Unimplemented on ARM, fall through to bytecode. | 646 offsets_vector[i] = -1; |
740 } | 647 } |
741 { | 648 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
742 #endif | 649 |
743 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 650 rc = IrregexpInterpreter::Match(byte_codes, |
744 offsets_vector[i] = -1; | 651 subject, |
| 652 offsets_vector, |
| 653 previous_index); |
| 654 break; |
745 } | 655 } |
746 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); | 656 case RegExpMacroAssembler::kARMImplementation: |
747 | 657 default: |
748 rc = IrregexpInterpreter::Match(byte_codes, | 658 UNREACHABLE(); |
749 subject, | 659 rc = false; |
750 offsets_vector, | 660 break; |
751 previous_index); | |
752 } | 661 } |
753 | 662 |
754 if (!rc) { | 663 if (!rc) { |
755 return Factory::null_value(); | 664 return Factory::null_value(); |
756 } | 665 } |
757 | 666 |
758 FixedArray* array = last_match_info->elements(); | 667 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
759 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
760 // The captures come in (start, end+1) pairs. | 668 // The captures come in (start, end+1) pairs. |
761 for (int i = 0; i < number_of_capture_registers; i += 2) { | 669 for (int i = 0; i < 2 * (num_captures + 1); i += 2) { |
762 SetCapture(array, i, offsets_vector[i]); | 670 array->set(i, Smi::FromInt(offsets_vector[i])); |
763 SetCapture(array, i + 1, offsets_vector[i + 1]); | 671 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); |
764 } | 672 } |
765 SetLastCaptureCount(array, number_of_capture_registers); | 673 return Factory::NewJSArrayWithElements(array); |
766 SetLastSubject(array, *subject); | |
767 SetLastInput(array, *subject); | |
768 return last_match_info; | |
769 } | 674 } |
770 | 675 |
771 | 676 |
772 // ------------------------------------------------------------------- | 677 // ------------------------------------------------------------------- |
773 // Implementation of the Irregexp regular expression engine. | 678 // Implmentation of the Irregexp regular expression engine. |
774 // | 679 // |
775 // The Irregexp regular expression engine is intended to be a complete | 680 // The Irregexp regular expression engine is intended to be a complete |
776 // implementation of ECMAScript regular expressions. It generates either | 681 // implementation of ECMAScript regular expressions. It generates either |
777 // bytecodes or native code. | 682 // bytecodes or native code. |
778 | 683 |
779 // The Irregexp regexp engine is structured in three steps. | 684 // The Irregexp regexp engine is structured in three steps. |
780 // 1) The parser generates an abstract syntax tree. See ast.cc. | 685 // 1) The parser generates an abstract syntax tree. See ast.cc. |
781 // 2) From the AST a node network is created. The nodes are all | 686 // 2) From the AST a node network is created. The nodes are all |
782 // subclasses of RegExpNode. The nodes represent states when | 687 // subclasses of RegExpNode. The nodes represent states when |
783 // executing a regular expression. Several optimizations are | 688 // executing a regular expression. Several optimizations are |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
980 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); | 885 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); |
981 | 886 |
982 int AllocateRegister() { | 887 int AllocateRegister() { |
983 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { | 888 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { |
984 reg_exp_too_big_ = true; | 889 reg_exp_too_big_ = true; |
985 return next_register_; | 890 return next_register_; |
986 } | 891 } |
987 return next_register_++; | 892 return next_register_++; |
988 } | 893 } |
989 | 894 |
990 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, | 895 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
991 RegExpNode* start, | 896 RegExpNode* start, |
992 int capture_count, | 897 int capture_count, |
993 Handle<String> pattern); | 898 Handle<String> pattern); |
994 | 899 |
995 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 900 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
996 | 901 |
997 static const int kImplementationOffset = 0; | 902 static const int kImplementationOffset = 0; |
998 static const int kNumberOfRegistersOffset = 0; | 903 static const int kNumberOfRegistersOffset = 0; |
999 static const int kCodeOffset = 1; | 904 static const int kCodeOffset = 1; |
1000 | 905 |
1001 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 906 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
1002 EndNode* accept() { return accept_; } | 907 EndNode* accept() { return accept_; } |
1003 | 908 |
(...skipping 24 matching lines...) Expand all Loading... |
1028 public: | 933 public: |
1029 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 934 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
1030 compiler->IncrementRecursionDepth(); | 935 compiler->IncrementRecursionDepth(); |
1031 } | 936 } |
1032 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 937 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
1033 private: | 938 private: |
1034 RegExpCompiler* compiler_; | 939 RegExpCompiler* compiler_; |
1035 }; | 940 }; |
1036 | 941 |
1037 | 942 |
1038 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 943 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { |
1039 return RegExpEngine::CompilationResult("RegExp too big"); | 944 Handle<JSArray> array = Factory::NewJSArray(2); |
| 945 SetElement(array, 0, pattern); |
| 946 const char* message = "RegExp too big"; |
| 947 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); |
| 948 Handle<Object> regexp_err = |
| 949 Factory::NewSyntaxError("malformed_regexp", array); |
| 950 Top::Throw(*regexp_err); |
| 951 return Handle<FixedArray>(); |
1040 } | 952 } |
1041 | 953 |
1042 | 954 |
1043 // Attempts to compile the regexp using an Irregexp code generator. Returns | 955 // Attempts to compile the regexp using an Irregexp code generator. Returns |
1044 // a fixed array or a null handle depending on whether it succeeded. | 956 // a fixed array or a null handle depending on whether it succeeded. |
1045 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 957 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
1046 : next_register_(2 * (capture_count + 1)), | 958 : next_register_(2 * (capture_count + 1)), |
1047 work_list_(NULL), | 959 work_list_(NULL), |
1048 recursion_depth_(0), | 960 recursion_depth_(0), |
1049 ignore_case_(ignore_case), | 961 ignore_case_(ignore_case), |
1050 ascii_(ascii), | 962 ascii_(ascii), |
1051 reg_exp_too_big_(false) { | 963 reg_exp_too_big_(false) { |
1052 accept_ = new EndNode(EndNode::ACCEPT); | 964 accept_ = new EndNode(EndNode::ACCEPT); |
1053 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 965 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
1054 } | 966 } |
1055 | 967 |
1056 | 968 |
1057 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 969 Handle<FixedArray> RegExpCompiler::Assemble( |
1058 RegExpMacroAssembler* macro_assembler, | 970 RegExpMacroAssembler* macro_assembler, |
1059 RegExpNode* start, | 971 RegExpNode* start, |
1060 int capture_count, | 972 int capture_count, |
1061 Handle<String> pattern) { | 973 Handle<String> pattern) { |
1062 #ifdef DEBUG | 974 #ifdef DEBUG |
1063 if (FLAG_trace_regexp_assembler) | 975 if (FLAG_trace_regexp_assembler) |
1064 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | 976 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
1065 else | 977 else |
1066 #endif | 978 #endif |
1067 macro_assembler_ = macro_assembler; | 979 macro_assembler_ = macro_assembler; |
1068 List <RegExpNode*> work_list(0); | 980 List <RegExpNode*> work_list(0); |
1069 work_list_ = &work_list; | 981 work_list_ = &work_list; |
1070 Label fail; | 982 Label fail; |
1071 macro_assembler_->PushBacktrack(&fail); | 983 macro_assembler_->PushBacktrack(&fail); |
1072 Trace new_trace; | 984 Trace new_trace; |
1073 start->Emit(this, &new_trace); | 985 start->Emit(this, &new_trace); |
1074 macro_assembler_->Bind(&fail); | 986 macro_assembler_->Bind(&fail); |
1075 macro_assembler_->Fail(); | 987 macro_assembler_->Fail(); |
1076 while (!work_list.is_empty()) { | 988 while (!work_list.is_empty()) { |
1077 work_list.RemoveLast()->Emit(this, &new_trace); | 989 work_list.RemoveLast()->Emit(this, &new_trace); |
1078 } | 990 } |
1079 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 991 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); |
1080 | 992 Handle<FixedArray> array = |
| 993 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); |
| 994 array->set(RegExpImpl::kIrregexpImplementationIndex, |
| 995 Smi::FromInt(macro_assembler_->Implementation())); |
| 996 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, |
| 997 Smi::FromInt(next_register_)); |
| 998 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, |
| 999 Smi::FromInt(capture_count)); |
1081 Handle<Object> code = macro_assembler_->GetCode(pattern); | 1000 Handle<Object> code = macro_assembler_->GetCode(pattern); |
1082 | 1001 array->set(RegExpImpl::kIrregexpCodeIndex, *code); |
1083 work_list_ = NULL; | 1002 work_list_ = NULL; |
1084 #ifdef DEBUG | 1003 #ifdef DEBUG |
1085 if (FLAG_trace_regexp_assembler) { | 1004 if (FLAG_trace_regexp_assembler) { |
1086 delete macro_assembler_; | 1005 delete macro_assembler_; |
1087 } | 1006 } |
1088 #endif | 1007 #endif |
1089 return RegExpEngine::CompilationResult(*code, next_register_); | 1008 return array; |
1090 } | 1009 } |
1091 | 1010 |
1092 | 1011 |
1093 bool Trace::DeferredAction::Mentions(int that) { | 1012 bool Trace::DeferredAction::Mentions(int that) { |
1094 if (type() == ActionNode::CLEAR_CAPTURES) { | 1013 if (type() == ActionNode::CLEAR_CAPTURES) { |
1095 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 1014 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
1096 return range.Contains(that); | 1015 return range.Contains(that); |
1097 } else { | 1016 } else { |
1098 return reg() == that; | 1017 return reg() == that; |
1099 } | 1018 } |
(...skipping 2697 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3797 // x{f, t} becomes this: | 3716 // x{f, t} becomes this: |
3798 // | 3717 // |
3799 // (r++)<-. | 3718 // (r++)<-. |
3800 // | ` | 3719 // | ` |
3801 // | (x) | 3720 // | (x) |
3802 // v ^ | 3721 // v ^ |
3803 // (r=0)-->(?)---/ [if r < t] | 3722 // (r=0)-->(?)---/ [if r < t] |
3804 // | | 3723 // | |
3805 // [if r >= f] \----> ... | 3724 // [if r >= f] \----> ... |
3806 // | 3725 // |
| 3726 // |
| 3727 // TODO(someone): clear captures on repetition and handle empty |
| 3728 // matches. |
3807 | 3729 |
3808 // 15.10.2.5 RepeatMatcher algorithm. | 3730 // 15.10.2.5 RepeatMatcher algorithm. |
3809 // The parser has already eliminated the case where max is 0. In the case | 3731 // The parser has already eliminated the case where max is 0. In the case |
3810 // where max_match is zero the parser has removed the quantifier if min was | 3732 // where max_match is zero the parser has removed the quantifier if min was |
3811 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. | 3733 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. |
3812 | 3734 |
3813 // If we know that we cannot match zero length then things are a little | 3735 // If we know that we cannot match zero length then things are a little |
3814 // simpler since we don't need to make the special zero length match check | 3736 // simpler since we don't need to make the special zero length match check |
3815 // from step 2.1. If the min and max are small we can unroll a little in | 3737 // from step 2.1. If the min and max are small we can unroll a little in |
3816 // this case. | 3738 // this case. |
(...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4663 } | 4585 } |
4664 } | 4586 } |
4665 | 4587 |
4666 | 4588 |
4667 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 4589 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
4668 RegExpNode* target = that->on_success(); | 4590 RegExpNode* target = that->on_success(); |
4669 target->Accept(this); | 4591 target->Accept(this); |
4670 } | 4592 } |
4671 | 4593 |
4672 | 4594 |
4673 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, | 4595 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, |
4674 bool ignore_case, | 4596 bool ignore_case, |
4675 bool is_multiline, | 4597 bool is_multiline, |
4676 Handle<String> pattern, | 4598 Handle<String> pattern, |
4677 bool is_ascii) { | 4599 bool is_ascii) { |
4678 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 4600 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
4679 return IrregexpRegExpTooBig(); | 4601 return IrregexpRegExpTooBig(pattern); |
4680 } | 4602 } |
4681 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); | 4603 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
4682 // Wrap the body of the regexp in capture #0. | 4604 // Wrap the body of the regexp in capture #0. |
4683 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4605 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
4684 0, | 4606 0, |
4685 &compiler, | 4607 &compiler, |
4686 compiler.accept()); | 4608 compiler.accept()); |
4687 RegExpNode* node = captured_body; | 4609 RegExpNode* node = captured_body; |
4688 if (!data->tree->IsAnchored()) { | 4610 if (!data->tree->IsAnchored()) { |
4689 // Add a .*? at the beginning, outside the body capture, unless | 4611 // Add a .*? at the beginning, outside the body capture, unless |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4736 EmbeddedVector<byte, 1024> codes; | 4658 EmbeddedVector<byte, 1024> codes; |
4737 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4659 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
4738 return compiler.Assemble(¯o_assembler, | 4660 return compiler.Assemble(¯o_assembler, |
4739 node, | 4661 node, |
4740 data->capture_count, | 4662 data->capture_count, |
4741 pattern); | 4663 pattern); |
4742 } | 4664 } |
4743 | 4665 |
4744 | 4666 |
4745 }} // namespace v8::internal | 4667 }} // namespace v8::internal |
OLD | NEW |