| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 206 Handle<String> pattern, | 206 Handle<String> pattern, |
| 207 Handle<String> flag_str) { | 207 Handle<String> flag_str) { |
| 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
| 210 bool in_cache = !cached.is_null(); | 210 bool in_cache = !cached.is_null(); |
| 211 LOG(RegExpCompileEvent(re, in_cache)); | 211 LOG(RegExpCompileEvent(re, in_cache)); |
| 212 | 212 |
| 213 Handle<Object> result; | 213 Handle<Object> result; |
| 214 if (in_cache) { | 214 if (in_cache) { |
| 215 re->set_data(*cached); | 215 re->set_data(*cached); |
| 216 return re; | 216 result = re; |
| 217 } | 217 } else { |
| 218 FlattenString(pattern); | 218 FlattenString(pattern); |
| 219 ZoneScope zone_scope(DELETE_ON_EXIT); | 219 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 220 RegExpCompileData parse_result; | 220 RegExpCompileData parse_result; |
| 221 FlatStringReader reader(pattern); | 221 FlatStringReader reader(pattern); |
| 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| 223 // Throw an exception if we fail to parse the pattern. | 223 // Throw an exception if we fail to parse the pattern. |
| 224 ThrowRegExpException(re, | 224 ThrowRegExpException(re, |
| 225 pattern, | 225 pattern, |
| 226 parse_result.error, | 226 parse_result.error, |
| 227 "malformed_regexp"); | 227 "malformed_regexp"); |
| 228 return Handle<Object>::null(); | 228 return Handle<Object>::null(); |
| 229 } |
| 230 |
| 231 if (parse_result.simple && !flags.is_ignore_case()) { |
| 232 // Parse-tree is a single atom that is equal to the pattern. |
| 233 result = AtomCompile(re, pattern, flags, pattern); |
| 234 } else if (parse_result.tree->IsAtom() && |
| 235 !flags.is_ignore_case() && |
| 236 parse_result.capture_count == 0) { |
| 237 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 238 Vector<const uc16> atom_pattern = atom->data(); |
| 239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| 240 result = AtomCompile(re, pattern, flags, atom_string); |
| 241 } else { |
| 242 result = IrregexpPrepare(re, pattern, flags); |
| 243 } |
| 244 Object* data = re->data(); |
| 245 if (data->IsFixedArray()) { |
| 246 // If compilation succeeded then the data is set on the regexp |
| 247 // and we can store it in the cache. |
| 248 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 249 CompilationCache::PutRegExp(pattern, flags, data); |
| 250 } |
| 229 } | 251 } |
| 230 | 252 |
| 231 if (parse_result.simple && !flags.is_ignore_case()) { | 253 return result; |
| 232 // Parse-tree is a single atom that is equal to the pattern. | |
| 233 AtomCompile(re, pattern, flags, pattern); | |
| 234 } else if (parse_result.tree->IsAtom() && | |
| 235 !flags.is_ignore_case() && | |
| 236 parse_result.capture_count == 0) { | |
| 237 RegExpAtom* atom = parse_result.tree->AsAtom(); | |
| 238 Vector<const uc16> atom_pattern = atom->data(); | |
| 239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | |
| 240 AtomCompile(re, pattern, flags, atom_string); | |
| 241 } else { | |
| 242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | |
| 243 } | |
| 244 ASSERT(re->data()->IsFixedArray()); | |
| 245 // Compilation succeeded so the data is set on the regexp | |
| 246 // and we can store it in the cache. | |
| 247 Handle<FixedArray> data(FixedArray::cast(re->data())); | |
| 248 CompilationCache::PutRegExp(pattern, flags, data); | |
| 249 | |
| 250 return re; | |
| 251 } | 254 } |
| 252 | 255 |
| 253 | 256 |
| 254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| 255 Handle<String> subject, | 258 Handle<String> subject, |
| 256 int index, | 259 Handle<Object> index) { |
| 257 Handle<JSArray> last_match_info) { | |
| 258 switch (regexp->TypeTag()) { | 260 switch (regexp->TypeTag()) { |
| 259 case JSRegExp::ATOM: | 261 case JSRegExp::ATOM: |
| 260 return AtomExec(regexp, subject, index, last_match_info); | 262 return AtomExec(regexp, subject, index); |
| 261 case JSRegExp::IRREGEXP: { | 263 case JSRegExp::IRREGEXP: { |
| 262 Handle<Object> result = | 264 Handle<Object> result = IrregexpExec(regexp, subject, index); |
| 263 IrregexpExec(regexp, subject, index, last_match_info); | |
| 264 ASSERT(!result.is_null() || Top::has_pending_exception()); | 265 ASSERT(!result.is_null() || Top::has_pending_exception()); |
| 265 return result; | 266 return result; |
| 266 } | 267 } |
| 267 default: | 268 default: |
| 268 UNREACHABLE(); | 269 UNREACHABLE(); |
| 269 return Handle<Object>::null(); | 270 return Handle<Object>::null(); |
| 270 } | 271 } |
| 271 } | 272 } |
| 272 | 273 |
| 273 | 274 |
| 274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 275 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| 275 Handle<String> subject, | 276 Handle<String> subject) { |
| 276 Handle<JSArray> last_match_info) { | |
| 277 switch (regexp->TypeTag()) { | 277 switch (regexp->TypeTag()) { |
| 278 case JSRegExp::ATOM: | 278 case JSRegExp::ATOM: |
| 279 return AtomExecGlobal(regexp, subject, last_match_info); | 279 return AtomExecGlobal(regexp, subject); |
| 280 case JSRegExp::IRREGEXP: { | 280 case JSRegExp::IRREGEXP: { |
| 281 Handle<Object> result = | 281 Handle<Object> result = IrregexpExecGlobal(regexp, subject); |
| 282 IrregexpExecGlobal(regexp, subject, last_match_info); | |
| 283 ASSERT(!result.is_null() || Top::has_pending_exception()); | 282 ASSERT(!result.is_null() || Top::has_pending_exception()); |
| 284 return result; | 283 return result; |
| 285 } | 284 } |
| 286 default: | 285 default: |
| 287 UNREACHABLE(); | 286 UNREACHABLE(); |
| 288 return Handle<Object>::null(); | 287 return Handle<Object>::null(); |
| 289 } | 288 } |
| 290 } | 289 } |
| 291 | 290 |
| 292 | 291 |
| 293 // RegExp Atom implementation: Simple string search using indexOf. | 292 // RegExp Atom implementation: Simple string search using indexOf. |
| 294 | 293 |
| 295 | 294 |
| 296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| 297 Handle<String> pattern, | 296 Handle<String> pattern, |
| 298 JSRegExp::Flags flags, | 297 JSRegExp::Flags flags, |
| 299 Handle<String> match_pattern) { | 298 Handle<String> match_pattern) { |
| 300 Factory::SetRegExpAtomData(re, | 299 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); |
| 301 JSRegExp::ATOM, | 300 return re; |
| 302 pattern, | |
| 303 flags, | |
| 304 match_pattern); | |
| 305 } | |
| 306 | |
| 307 | |
| 308 static void SetAtomLastCapture(FixedArray* array, | |
| 309 String* subject, | |
| 310 int from, | |
| 311 int to) { | |
| 312 NoHandleAllocation no_handles; | |
| 313 RegExpImpl::SetLastCaptureCount(array, 2); | |
| 314 RegExpImpl::SetLastSubject(array, subject); | |
| 315 RegExpImpl::SetLastInput(array, subject); | |
| 316 RegExpImpl::SetCapture(array, 0, from); | |
| 317 RegExpImpl::SetCapture(array, 1, to); | |
| 318 } | 301 } |
| 319 | 302 |
| 320 | 303 |
| 321 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 304 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| 322 Handle<String> subject, | 305 Handle<String> subject, |
| 323 int index, | 306 Handle<Object> index) { |
| 324 Handle<JSArray> last_match_info) { | |
| 325 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 307 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| 326 | 308 |
| 327 uint32_t start_index = index; | 309 uint32_t start_index; |
| 310 if (!Array::IndexFromObject(*index, &start_index)) { |
| 311 return Handle<Smi>(Smi::FromInt(-1)); |
| 312 } |
| 328 | 313 |
| 329 int value = Runtime::StringMatch(subject, needle, start_index); | 314 int value = Runtime::StringMatch(subject, needle, start_index); |
| 330 if (value == -1) return Factory::null_value(); | 315 if (value == -1) return Factory::null_value(); |
| 331 ASSERT(last_match_info->HasFastElements()); | |
| 332 | 316 |
| 333 { | 317 Handle<FixedArray> array = Factory::NewFixedArray(2); |
| 334 NoHandleAllocation no_handles; | 318 array->set(0, Smi::FromInt(value)); |
| 335 FixedArray* array = last_match_info->elements(); | 319 array->set(1, Smi::FromInt(value + needle->length())); |
| 336 SetAtomLastCapture(array, *subject, value, value + needle->length()); | 320 return Factory::NewJSArrayWithElements(array); |
| 337 } | |
| 338 return last_match_info; | |
| 339 } | 321 } |
| 340 | 322 |
| 341 | 323 |
| 342 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, | 324 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
| 343 Handle<String> subject, | 325 Handle<String> subject) { |
| 344 Handle<JSArray> last_match_info) { | |
| 345 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 326 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| 346 ASSERT(last_match_info->HasFastElements()); | |
| 347 Handle<JSArray> result = Factory::NewJSArray(1); | 327 Handle<JSArray> result = Factory::NewJSArray(1); |
| 348 int index = 0; | 328 int index = 0; |
| 349 int match_count = 0; | 329 int match_count = 0; |
| 350 int subject_length = subject->length(); | 330 int subject_length = subject->length(); |
| 351 int needle_length = needle->length(); | 331 int needle_length = needle->length(); |
| 352 int last_value = -1; | |
| 353 while (true) { | 332 while (true) { |
| 354 HandleScope scope; | |
| 355 int value = -1; | 333 int value = -1; |
| 356 if (index + needle_length <= subject_length) { | 334 if (index + needle_length <= subject_length) { |
| 357 value = Runtime::StringMatch(subject, needle, index); | 335 value = Runtime::StringMatch(subject, needle, index); |
| 358 } | 336 } |
| 359 if (value == -1) { | 337 if (value == -1) break; |
| 360 if (last_value != -1) { | 338 HandleScope scope; |
| 361 Handle<FixedArray> array(last_match_info->elements()); | |
| 362 SetAtomLastCapture(*array, | |
| 363 *subject, | |
| 364 last_value, | |
| 365 last_value + needle->length()); | |
| 366 } | |
| 367 break; | |
| 368 } | |
| 369 | |
| 370 int end = value + needle_length; | 339 int end = value + needle_length; |
| 371 | 340 |
| 372 // Create an array that looks like the static last_match_info array | 341 Handle<FixedArray> array = Factory::NewFixedArray(2); |
| 373 // that is attached to the global RegExp object. We will be returning | 342 array->set(0, Smi::FromInt(value)); |
| 374 // an array of these. | 343 array->set(1, Smi::FromInt(end)); |
| 375 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); | |
| 376 SetCapture(*array, 0, value); | |
| 377 SetCapture(*array, 1, end); | |
| 378 SetLastCaptureCount(*array, 2); | |
| 379 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); | 344 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
| 380 SetElement(result, match_count, pair); | 345 SetElement(result, match_count, pair); |
| 381 match_count++; | 346 match_count++; |
| 382 index = end; | 347 index = end; |
| 383 if (needle_length == 0) index++; | 348 if (needle_length == 0) index++; |
| 384 last_value = value; | |
| 385 } | 349 } |
| 386 return result; | 350 return result; |
| 387 } | 351 } |
| 388 | 352 |
| 389 | 353 |
| 390 // Irregexp implementation. | 354 // Irregexp implementation. |
| 391 | 355 |
| 392 | 356 |
| 393 // Ensures that the regexp object contains a compiled version of the | 357 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII |
| 394 // source for either ASCII or non-ASCII strings. | 358 // strings. If the compiled version doesn't already exist, it is compiled |
| 395 // If the compiled version doesn't already exist, it is compiled | |
| 396 // from the source pattern. | 359 // from the source pattern. |
| 397 // If compilation fails, an exception is thrown and this function | 360 // Irregexp is not feature complete yet. If there is something in the |
| 398 // returns false. | 361 // regexp that the compiler cannot currently handle, an empty |
| 399 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, | 362 // handle is returned, but no exception is thrown. |
| 400 bool is_ascii) { | 363 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, |
| 401 int index; | 364 bool is_ascii) { |
| 402 if (is_ascii) { | 365 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| 403 index = JSRegExp::kIrregexpASCIICodeIndex; | 366 Handle<FixedArray> alternatives( |
| 404 } else { | 367 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); |
| 405 index = JSRegExp::kIrregexpUC16CodeIndex; | 368 ASSERT_EQ(2, alternatives->length()); |
| 406 } | 369 |
| 407 Object* entry = re->DataAt(index); | 370 int index = is_ascii ? 0 : 1; |
| 408 if (!entry->IsTheHole()) { | 371 Object* entry = alternatives->get(index); |
| 409 // A value has already been compiled. | 372 if (!entry->IsNull()) { |
| 410 if (entry->IsJSObject()) { | 373 return Handle<FixedArray>(FixedArray::cast(entry)); |
| 411 // If it's a JS value, it's an error. | |
| 412 Top::Throw(entry); | |
| 413 return false; | |
| 414 } | |
| 415 return true; | |
| 416 } | 374 } |
| 417 | 375 |
| 418 // Compile the RegExp. | 376 // Compile the RegExp. |
| 419 ZoneScope zone_scope(DELETE_ON_EXIT); | 377 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 420 | 378 |
| 421 JSRegExp::Flags flags = re->GetFlags(); | 379 JSRegExp::Flags flags = re->GetFlags(); |
| 422 | 380 |
| 423 Handle<String> pattern(re->Pattern()); | 381 Handle<String> pattern(re->Pattern()); |
| 424 if (!pattern->IsFlat(StringShape(*pattern))) { | 382 if (!pattern->IsFlat(StringShape(*pattern))) { |
| 425 FlattenString(pattern); | 383 FlattenString(pattern); |
| 426 } | 384 } |
| 427 | 385 |
| 428 RegExpCompileData compile_data; | 386 RegExpCompileData compile_data; |
| 429 FlatStringReader reader(pattern); | 387 FlatStringReader reader(pattern); |
| 430 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { | 388 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { |
| 431 // Throw an exception if we fail to parse the pattern. | 389 // Throw an exception if we fail to parse the pattern. |
| 432 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. | 390 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. |
| 433 ThrowRegExpException(re, | 391 ThrowRegExpException(re, |
| 434 pattern, | 392 pattern, |
| 435 compile_data.error, | 393 compile_data.error, |
| 436 "malformed_regexp"); | 394 "malformed_regexp"); |
| 437 return false; | 395 return Handle<FixedArray>::null(); |
| 438 } | 396 } |
| 439 RegExpEngine::CompilationResult result = | 397 Handle<FixedArray> compiled_entry = |
| 440 RegExpEngine::Compile(&compile_data, | 398 RegExpEngine::Compile(&compile_data, |
| 441 flags.is_ignore_case(), | 399 flags.is_ignore_case(), |
| 442 flags.is_multiline(), | 400 flags.is_multiline(), |
| 443 pattern, | 401 pattern, |
| 444 is_ascii); | 402 is_ascii); |
| 445 if (result.error_message != NULL) { | 403 if (!compiled_entry.is_null()) { |
| 446 // Unable to compile regexp. | 404 alternatives->set(index, *compiled_entry); |
| 447 Handle<JSArray> array = Factory::NewJSArray(2); | |
| 448 SetElement(array, 0, pattern); | |
| 449 SetElement(array, | |
| 450 1, | |
| 451 Factory::NewStringFromUtf8(CStrVector(result.error_message))); | |
| 452 Handle<Object> regexp_err = | |
| 453 Factory::NewSyntaxError("malformed_regexp", array); | |
| 454 Top::Throw(*regexp_err); | |
| 455 re->SetDataAt(index, *regexp_err); | |
| 456 return false; | |
| 457 } | 405 } |
| 458 | 406 return compiled_entry; |
| 459 NoHandleAllocation no_handles; | |
| 460 | |
| 461 FixedArray* data = FixedArray::cast(re->data()); | |
| 462 data->set(index, result.code); | |
| 463 int register_max = IrregexpMaxRegisterCount(data); | |
| 464 if (result.num_registers > register_max) { | |
| 465 SetIrregexpMaxRegisterCount(data, result.num_registers); | |
| 466 } | |
| 467 | |
| 468 return true; | |
| 469 } | 407 } |
| 470 | 408 |
| 471 | 409 |
| 472 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { | 410 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { |
| 473 return Smi::cast( | 411 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); |
| 474 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); | |
| 475 } | 412 } |
| 476 | 413 |
| 477 | 414 |
| 478 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { | 415 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { |
| 479 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); | 416 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); |
| 480 } | 417 } |
| 481 | 418 |
| 482 | 419 |
| 483 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { | 420 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { |
| 484 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); | 421 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() |
| 422 == RegExpMacroAssembler::kBytecodeImplementation); |
| 423 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex))); |
| 485 } | 424 } |
| 486 | 425 |
| 487 | 426 |
| 488 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { | 427 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { |
| 489 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); | 428 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() |
| 429 != RegExpMacroAssembler::kBytecodeImplementation); |
| 430 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex))); |
| 490 } | 431 } |
| 491 | 432 |
| 492 | 433 |
| 493 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { | 434 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
| 494 int index; | 435 Handle<String> pattern, |
| 495 if (is_ascii) { | 436 JSRegExp::Flags flags) { |
| 496 index = JSRegExp::kIrregexpASCIICodeIndex; | 437 // Make space for ASCII and UC16 versions. |
| 497 } else { | 438 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); |
| 498 index = JSRegExp::kIrregexpUC16CodeIndex; | 439 alternatives->set_null(0); |
| 499 } | 440 alternatives->set_null(1); |
| 500 return ByteArray::cast(re->get(index)); | 441 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); |
| 501 } | 442 return re; |
| 502 | |
| 503 | |
| 504 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | |
| 505 int index; | |
| 506 if (is_ascii) { | |
| 507 index = JSRegExp::kIrregexpASCIICodeIndex; | |
| 508 } else { | |
| 509 index = JSRegExp::kIrregexpUC16CodeIndex; | |
| 510 } | |
| 511 return Code::cast(re->get(index)); | |
| 512 } | |
| 513 | |
| 514 | |
| 515 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | |
| 516 Handle<String> pattern, | |
| 517 JSRegExp::Flags flags, | |
| 518 int capture_count) { | |
| 519 // Initialize compiled code entries to null. | |
| 520 Factory::SetRegExpIrregexpData(re, | |
| 521 JSRegExp::IRREGEXP, | |
| 522 pattern, | |
| 523 flags, | |
| 524 capture_count); | |
| 525 } | 443 } |
| 526 | 444 |
| 527 | 445 |
| 528 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 446 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
| 529 Handle<String> subject, | 447 Handle<String> subject, |
| 530 int index, | 448 Handle<Object> index) { |
| 531 Handle<JSArray> last_match_info) { | |
| 532 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 449 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 450 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| 533 | 451 |
| 534 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 452 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| 535 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | 453 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); |
| 454 if (irregexp.is_null()) { |
| 455 // We can't handle the RegExp with IRRegExp. |
| 536 return Handle<Object>::null(); | 456 return Handle<Object>::null(); |
| 537 } | 457 } |
| 538 | 458 |
| 539 // Prepare space for the return values. | 459 // Prepare space for the return values. |
| 540 Handle<FixedArray> re_data(FixedArray::cast(regexp->data())); | 460 int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| 541 int number_of_capture_registers = | 461 OffsetsVector offsets(number_of_registers); |
| 542 (IrregexpNumberOfCaptures(*re_data) + 1) * 2; | |
| 543 OffsetsVector offsets(number_of_capture_registers); | |
| 544 | 462 |
| 545 int previous_index = index; | 463 int num_captures = IrregexpNumberOfCaptures(irregexp); |
| 464 |
| 465 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| 546 | 466 |
| 547 #ifdef DEBUG | 467 #ifdef DEBUG |
| 548 if (FLAG_trace_regexp_bytecodes) { | 468 if (FLAG_trace_regexp_bytecodes) { |
| 549 String* pattern = regexp->Pattern(); | 469 String* pattern = regexp->Pattern(); |
| 550 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 470 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 551 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 471 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 552 } | 472 } |
| 553 #endif | 473 #endif |
| 554 | 474 |
| 555 if (!subject->IsFlat(StringShape(*subject))) { | 475 if (!subject->IsFlat(StringShape(*subject))) { |
| 556 FlattenString(subject); | 476 FlattenString(subject); |
| 557 } | 477 } |
| 558 | 478 |
| 559 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | 479 return IrregexpExecOnce(irregexp, |
| 560 | 480 num_captures, |
| 561 return IrregexpExecOnce(re_data, | |
| 562 number_of_capture_registers, | |
| 563 last_match_info, | |
| 564 subject, | 481 subject, |
| 565 previous_index, | 482 previous_index, |
| 566 offsets.vector(), | 483 offsets.vector(), |
| 567 offsets.length()); | 484 offsets.length()); |
| 568 } | 485 } |
| 569 | 486 |
| 570 | 487 |
| 571 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 488 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
| 572 Handle<String> subject, | 489 Handle<String> subject) { |
| 573 Handle<JSArray> last_match_info) { | |
| 574 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 490 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 575 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | |
| 576 | 491 |
| 577 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 492 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| 578 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | 493 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); |
| 494 if (irregexp.is_null()) { |
| 579 return Handle<Object>::null(); | 495 return Handle<Object>::null(); |
| 580 } | 496 } |
| 581 | 497 |
| 582 // Prepare space for the return values. | 498 // Prepare space for the return values. |
| 583 int number_of_capture_registers = | 499 int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| 584 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 500 OffsetsVector offsets(number_of_registers); |
| 585 OffsetsVector offsets(number_of_capture_registers); | |
| 586 | 501 |
| 587 int previous_index = 0; | 502 int previous_index = 0; |
| 588 | 503 |
| 589 Handle<JSArray> result = Factory::NewJSArray(0); | 504 Handle<JSArray> result = Factory::NewJSArray(0); |
| 590 int result_length = 0; | 505 int i = 0; |
| 591 Handle<Object> matches; | 506 Handle<Object> matches; |
| 592 | 507 |
| 593 if (!subject->IsFlat(StringShape(*subject))) { | 508 if (!subject->IsFlat(StringShape(*subject))) { |
| 594 FlattenString(subject); | 509 FlattenString(subject); |
| 595 } | 510 } |
| 596 | 511 |
| 597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
| 598 | |
| 599 while (true) { | 512 while (true) { |
| 600 if (previous_index > subject->length() || previous_index < 0) { | 513 if (previous_index > subject->length() || previous_index < 0) { |
| 601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 514 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| 602 // string length, there is no match. | 515 // string length, there is no match. |
| 603 matches = Factory::null_value(); | 516 matches = Factory::null_value(); |
| 604 return result; | 517 return result; |
| 605 } else { | 518 } else { |
| 606 #ifdef DEBUG | 519 #ifdef DEBUG |
| 607 if (FLAG_trace_regexp_bytecodes) { | 520 if (FLAG_trace_regexp_bytecodes) { |
| 608 String* pattern = regexp->Pattern(); | 521 String* pattern = regexp->Pattern(); |
| 609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 522 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 523 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 611 } | 524 } |
| 612 #endif | 525 #endif |
| 613 HandleScope scope; | |
| 614 matches = IrregexpExecOnce(irregexp, | 526 matches = IrregexpExecOnce(irregexp, |
| 615 number_of_capture_registers, | 527 IrregexpNumberOfCaptures(irregexp), |
| 616 last_match_info, | |
| 617 subject, | 528 subject, |
| 618 previous_index, | 529 previous_index, |
| 619 offsets.vector(), | 530 offsets.vector(), |
| 620 offsets.length()); | 531 offsets.length()); |
| 621 | 532 |
| 622 if (matches.is_null()) { | 533 if (matches.is_null()) { |
| 623 ASSERT(Top::has_pending_exception()); | 534 ASSERT(Top::has_pending_exception()); |
| 624 return matches; | 535 return matches; |
| 625 } | 536 } |
| 626 | 537 |
| 627 if (matches->IsJSArray()) { | 538 if (matches->IsJSArray()) { |
| 628 // Create an array that looks like the static last_match_info array | 539 SetElement(result, i, matches); |
| 629 // that is attached to the global RegExp object. We will be returning | 540 i++; |
| 630 // an array of these. | 541 previous_index = offsets.vector()[1]; |
| 631 Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); | 542 if (offsets.vector()[0] == offsets.vector()[1]) { |
| 632 Handle<JSArray> latest_match = | 543 previous_index++; |
| 633 Factory::NewJSArray(kFirstCapture + number_of_capture_registers); | |
| 634 Handle<FixedArray> latest_match_array(latest_match->elements()); | |
| 635 | |
| 636 for (int i = 0; i < number_of_capture_registers; i++) { | |
| 637 SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); | |
| 638 } | 544 } |
| 639 SetLastCaptureCount(*latest_match_array, number_of_capture_registers); | |
| 640 | |
| 641 SetElement(result, result_length, latest_match); | |
| 642 result_length++; | |
| 643 previous_index = GetCapture(*matches_array, 1); | |
| 644 if (GetCapture(*matches_array, 0) == previous_index) | |
| 645 previous_index++; | |
| 646 | |
| 647 } else { | 545 } else { |
| 648 ASSERT(matches->IsNull()); | 546 ASSERT(matches->IsNull()); |
| 649 return result; | 547 return result; |
| 650 } | 548 } |
| 651 } | 549 } |
| 652 } | 550 } |
| 653 } | 551 } |
| 654 | 552 |
| 655 | 553 |
| 656 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp, | 554 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
| 657 int number_of_capture_registers, | 555 int num_captures, |
| 658 Handle<JSArray> last_match_info, | |
| 659 Handle<String> subject, | 556 Handle<String> subject, |
| 660 int previous_index, | 557 int previous_index, |
| 661 int* offsets_vector, | 558 int* offsets_vector, |
| 662 int offsets_vector_length) { | 559 int offsets_vector_length) { |
| 663 StringShape shape(*subject); | 560 ASSERT(subject->IsFlat(StringShape(*subject))); |
| 664 ASSERT(subject->IsFlat(shape)); | |
| 665 bool is_ascii = shape.IsAsciiRepresentation(); | |
| 666 bool rc; | 561 bool rc; |
| 667 | 562 |
| 668 if (FLAG_regexp_native) { | 563 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); |
| 564 |
| 565 switch (tag) { |
| 566 case RegExpMacroAssembler::kIA32Implementation: { |
| 669 #ifndef ARM | 567 #ifndef ARM |
| 670 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); | 568 Handle<Code> code = IrregexpNativeCode(irregexp); |
| 671 | 569 |
| 672 // Character offsets into string. | 570 StringShape shape(*subject); |
| 673 int start_offset = previous_index; | |
| 674 int end_offset = subject->length(shape); | |
| 675 | 571 |
| 676 if (shape.IsCons()) { | 572 // Character offsets into string. |
| 677 subject = Handle<String>(ConsString::cast(*subject)->first()); | 573 int start_offset = previous_index; |
| 678 } else if (shape.IsSliced()) { | 574 int end_offset = subject->length(shape); |
| 679 SlicedString* slice = SlicedString::cast(*subject); | |
| 680 start_offset += slice->start(); | |
| 681 end_offset += slice->start(); | |
| 682 subject = Handle<String>(slice->buffer()); | |
| 683 } | |
| 684 | 575 |
| 685 // String is now either Sequential or External | 576 if (shape.IsCons()) { |
| 686 StringShape flatshape(*subject); | 577 subject = Handle<String>(ConsString::cast(*subject)->first()); |
| 687 bool is_ascii = flatshape.IsAsciiRepresentation(); | 578 } else if (shape.IsSliced()) { |
| 688 int char_size_shift = is_ascii ? 0 : 1; | 579 SlicedString* slice = SlicedString::cast(*subject); |
| 580 start_offset += slice->start(); |
| 581 end_offset += slice->start(); |
| 582 subject = Handle<String>(slice->buffer()); |
| 583 } |
| 689 | 584 |
| 690 RegExpMacroAssemblerIA32::Result res; | 585 // String is now either Sequential or External |
| 586 StringShape flatshape(*subject); |
| 587 bool is_ascii = flatshape.IsAsciiRepresentation(); |
| 588 int char_size_shift = is_ascii ? 0 : 1; |
| 691 | 589 |
| 692 if (flatshape.IsExternal()) { | 590 RegExpMacroAssemblerIA32::Result res; |
| 693 const byte* address; | 591 |
| 694 if (is_ascii) { | 592 if (flatshape.IsExternal()) { |
| 695 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); | 593 const byte* address; |
| 696 address = reinterpret_cast<const byte*>(ext->resource()->data()); | 594 if (is_ascii) { |
| 697 } else { | 595 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
| 698 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | 596 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| 699 address = reinterpret_cast<const byte*>(ext->resource()->data()); | 597 } else { |
| 598 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); |
| 599 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| 600 } |
| 601 res = RegExpMacroAssemblerIA32::Execute( |
| 602 *code, |
| 603 const_cast<Address*>(&address), |
| 604 start_offset << char_size_shift, |
| 605 end_offset << char_size_shift, |
| 606 offsets_vector, |
| 607 previous_index == 0); |
| 608 } else { // Sequential string |
| 609 ASSERT(StringShape(*subject).IsSequential()); |
| 610 Address char_address = |
| 611 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() |
| 612 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); |
| 613 int byte_offset = char_address - reinterpret_cast<Address>(*subject); |
| 614 res = RegExpMacroAssemblerIA32::Execute( |
| 615 *code, |
| 616 reinterpret_cast<Address*>(subject.location()), |
| 617 byte_offset + (start_offset << char_size_shift), |
| 618 byte_offset + (end_offset << char_size_shift), |
| 619 offsets_vector, |
| 620 previous_index == 0); |
| 700 } | 621 } |
| 701 res = RegExpMacroAssemblerIA32::Execute( | |
| 702 *code, | |
| 703 const_cast<Address*>(&address), | |
| 704 start_offset << char_size_shift, | |
| 705 end_offset << char_size_shift, | |
| 706 offsets_vector, | |
| 707 previous_index == 0); | |
| 708 } else { // Sequential string | |
| 709 ASSERT(StringShape(*subject).IsSequential()); | |
| 710 Address char_address = | |
| 711 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | |
| 712 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | |
| 713 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | |
| 714 res = RegExpMacroAssemblerIA32::Execute( | |
| 715 *code, | |
| 716 reinterpret_cast<Address*>(subject.location()), | |
| 717 byte_offset + (start_offset << char_size_shift), | |
| 718 byte_offset + (end_offset << char_size_shift), | |
| 719 offsets_vector, | |
| 720 previous_index == 0); | |
| 721 } | |
| 722 | 622 |
| 723 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { | 623 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
| 724 ASSERT(Top::has_pending_exception()); | 624 ASSERT(Top::has_pending_exception()); |
| 725 return Handle<Object>::null(); | 625 return Handle<Object>::null(); |
| 726 } | 626 } |
| 727 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); | 627 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); |
| 728 | 628 |
| 729 if (rc) { | 629 if (rc) { |
| 730 // Capture values are relative to start_offset only. | 630 // Capture values are relative to start_offset only. |
| 731 for (int i = 0; i < offsets_vector_length; i++) { | 631 for (int i = 0; i < offsets_vector_length; i++) { |
| 732 if (offsets_vector[i] >= 0) { | 632 if (offsets_vector[i] >= 0) { |
| 733 offsets_vector[i] += previous_index; | 633 offsets_vector[i] += previous_index; |
| 634 } |
| 734 } | 635 } |
| 735 } | 636 } |
| 637 break; |
| 638 #else |
| 639 UNIMPLEMENTED(); |
| 640 rc = false; |
| 641 break; |
| 642 #endif |
| 736 } | 643 } |
| 737 } else { | 644 case RegExpMacroAssembler::kBytecodeImplementation: { |
| 738 #else | 645 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
| 739 // Unimplemented on ARM, fall through to bytecode. | 646 offsets_vector[i] = -1; |
| 740 } | 647 } |
| 741 { | 648 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
| 742 #endif | 649 |
| 743 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 650 rc = IrregexpInterpreter::Match(byte_codes, |
| 744 offsets_vector[i] = -1; | 651 subject, |
| 652 offsets_vector, |
| 653 previous_index); |
| 654 break; |
| 745 } | 655 } |
| 746 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); | 656 case RegExpMacroAssembler::kARMImplementation: |
| 747 | 657 default: |
| 748 rc = IrregexpInterpreter::Match(byte_codes, | 658 UNREACHABLE(); |
| 749 subject, | 659 rc = false; |
| 750 offsets_vector, | 660 break; |
| 751 previous_index); | |
| 752 } | 661 } |
| 753 | 662 |
| 754 if (!rc) { | 663 if (!rc) { |
| 755 return Factory::null_value(); | 664 return Factory::null_value(); |
| 756 } | 665 } |
| 757 | 666 |
| 758 FixedArray* array = last_match_info->elements(); | 667 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
| 759 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
| 760 // The captures come in (start, end+1) pairs. | 668 // The captures come in (start, end+1) pairs. |
| 761 for (int i = 0; i < number_of_capture_registers; i += 2) { | 669 for (int i = 0; i < 2 * (num_captures + 1); i += 2) { |
| 762 SetCapture(array, i, offsets_vector[i]); | 670 array->set(i, Smi::FromInt(offsets_vector[i])); |
| 763 SetCapture(array, i + 1, offsets_vector[i + 1]); | 671 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); |
| 764 } | 672 } |
| 765 SetLastCaptureCount(array, number_of_capture_registers); | 673 return Factory::NewJSArrayWithElements(array); |
| 766 SetLastSubject(array, *subject); | |
| 767 SetLastInput(array, *subject); | |
| 768 return last_match_info; | |
| 769 } | 674 } |
| 770 | 675 |
| 771 | 676 |
| 772 // ------------------------------------------------------------------- | 677 // ------------------------------------------------------------------- |
| 773 // Implementation of the Irregexp regular expression engine. | 678 // Implmentation of the Irregexp regular expression engine. |
| 774 // | 679 // |
| 775 // The Irregexp regular expression engine is intended to be a complete | 680 // The Irregexp regular expression engine is intended to be a complete |
| 776 // implementation of ECMAScript regular expressions. It generates either | 681 // implementation of ECMAScript regular expressions. It generates either |
| 777 // bytecodes or native code. | 682 // bytecodes or native code. |
| 778 | 683 |
| 779 // The Irregexp regexp engine is structured in three steps. | 684 // The Irregexp regexp engine is structured in three steps. |
| 780 // 1) The parser generates an abstract syntax tree. See ast.cc. | 685 // 1) The parser generates an abstract syntax tree. See ast.cc. |
| 781 // 2) From the AST a node network is created. The nodes are all | 686 // 2) From the AST a node network is created. The nodes are all |
| 782 // subclasses of RegExpNode. The nodes represent states when | 687 // subclasses of RegExpNode. The nodes represent states when |
| 783 // executing a regular expression. Several optimizations are | 688 // executing a regular expression. Several optimizations are |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 980 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); | 885 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); |
| 981 | 886 |
| 982 int AllocateRegister() { | 887 int AllocateRegister() { |
| 983 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { | 888 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { |
| 984 reg_exp_too_big_ = true; | 889 reg_exp_too_big_ = true; |
| 985 return next_register_; | 890 return next_register_; |
| 986 } | 891 } |
| 987 return next_register_++; | 892 return next_register_++; |
| 988 } | 893 } |
| 989 | 894 |
| 990 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, | 895 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| 991 RegExpNode* start, | 896 RegExpNode* start, |
| 992 int capture_count, | 897 int capture_count, |
| 993 Handle<String> pattern); | 898 Handle<String> pattern); |
| 994 | 899 |
| 995 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 900 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 996 | 901 |
| 997 static const int kImplementationOffset = 0; | 902 static const int kImplementationOffset = 0; |
| 998 static const int kNumberOfRegistersOffset = 0; | 903 static const int kNumberOfRegistersOffset = 0; |
| 999 static const int kCodeOffset = 1; | 904 static const int kCodeOffset = 1; |
| 1000 | 905 |
| 1001 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 906 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
| 1002 EndNode* accept() { return accept_; } | 907 EndNode* accept() { return accept_; } |
| 1003 | 908 |
| (...skipping 24 matching lines...) Expand all Loading... |
| 1028 public: | 933 public: |
| 1029 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 934 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
| 1030 compiler->IncrementRecursionDepth(); | 935 compiler->IncrementRecursionDepth(); |
| 1031 } | 936 } |
| 1032 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 937 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 1033 private: | 938 private: |
| 1034 RegExpCompiler* compiler_; | 939 RegExpCompiler* compiler_; |
| 1035 }; | 940 }; |
| 1036 | 941 |
| 1037 | 942 |
| 1038 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 943 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { |
| 1039 return RegExpEngine::CompilationResult("RegExp too big"); | 944 Handle<JSArray> array = Factory::NewJSArray(2); |
| 945 SetElement(array, 0, pattern); |
| 946 const char* message = "RegExp too big"; |
| 947 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); |
| 948 Handle<Object> regexp_err = |
| 949 Factory::NewSyntaxError("malformed_regexp", array); |
| 950 Top::Throw(*regexp_err); |
| 951 return Handle<FixedArray>(); |
| 1040 } | 952 } |
| 1041 | 953 |
| 1042 | 954 |
| 1043 // Attempts to compile the regexp using an Irregexp code generator. Returns | 955 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 1044 // a fixed array or a null handle depending on whether it succeeded. | 956 // a fixed array or a null handle depending on whether it succeeded. |
| 1045 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 957 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
| 1046 : next_register_(2 * (capture_count + 1)), | 958 : next_register_(2 * (capture_count + 1)), |
| 1047 work_list_(NULL), | 959 work_list_(NULL), |
| 1048 recursion_depth_(0), | 960 recursion_depth_(0), |
| 1049 ignore_case_(ignore_case), | 961 ignore_case_(ignore_case), |
| 1050 ascii_(ascii), | 962 ascii_(ascii), |
| 1051 reg_exp_too_big_(false) { | 963 reg_exp_too_big_(false) { |
| 1052 accept_ = new EndNode(EndNode::ACCEPT); | 964 accept_ = new EndNode(EndNode::ACCEPT); |
| 1053 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 965 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
| 1054 } | 966 } |
| 1055 | 967 |
| 1056 | 968 |
| 1057 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 969 Handle<FixedArray> RegExpCompiler::Assemble( |
| 1058 RegExpMacroAssembler* macro_assembler, | 970 RegExpMacroAssembler* macro_assembler, |
| 1059 RegExpNode* start, | 971 RegExpNode* start, |
| 1060 int capture_count, | 972 int capture_count, |
| 1061 Handle<String> pattern) { | 973 Handle<String> pattern) { |
| 1062 #ifdef DEBUG | 974 #ifdef DEBUG |
| 1063 if (FLAG_trace_regexp_assembler) | 975 if (FLAG_trace_regexp_assembler) |
| 1064 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | 976 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
| 1065 else | 977 else |
| 1066 #endif | 978 #endif |
| 1067 macro_assembler_ = macro_assembler; | 979 macro_assembler_ = macro_assembler; |
| 1068 List <RegExpNode*> work_list(0); | 980 List <RegExpNode*> work_list(0); |
| 1069 work_list_ = &work_list; | 981 work_list_ = &work_list; |
| 1070 Label fail; | 982 Label fail; |
| 1071 macro_assembler_->PushBacktrack(&fail); | 983 macro_assembler_->PushBacktrack(&fail); |
| 1072 Trace new_trace; | 984 Trace new_trace; |
| 1073 start->Emit(this, &new_trace); | 985 start->Emit(this, &new_trace); |
| 1074 macro_assembler_->Bind(&fail); | 986 macro_assembler_->Bind(&fail); |
| 1075 macro_assembler_->Fail(); | 987 macro_assembler_->Fail(); |
| 1076 while (!work_list.is_empty()) { | 988 while (!work_list.is_empty()) { |
| 1077 work_list.RemoveLast()->Emit(this, &new_trace); | 989 work_list.RemoveLast()->Emit(this, &new_trace); |
| 1078 } | 990 } |
| 1079 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 991 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); |
| 1080 | 992 Handle<FixedArray> array = |
| 993 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); |
| 994 array->set(RegExpImpl::kIrregexpImplementationIndex, |
| 995 Smi::FromInt(macro_assembler_->Implementation())); |
| 996 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, |
| 997 Smi::FromInt(next_register_)); |
| 998 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, |
| 999 Smi::FromInt(capture_count)); |
| 1081 Handle<Object> code = macro_assembler_->GetCode(pattern); | 1000 Handle<Object> code = macro_assembler_->GetCode(pattern); |
| 1082 | 1001 array->set(RegExpImpl::kIrregexpCodeIndex, *code); |
| 1083 work_list_ = NULL; | 1002 work_list_ = NULL; |
| 1084 #ifdef DEBUG | 1003 #ifdef DEBUG |
| 1085 if (FLAG_trace_regexp_assembler) { | 1004 if (FLAG_trace_regexp_assembler) { |
| 1086 delete macro_assembler_; | 1005 delete macro_assembler_; |
| 1087 } | 1006 } |
| 1088 #endif | 1007 #endif |
| 1089 return RegExpEngine::CompilationResult(*code, next_register_); | 1008 return array; |
| 1090 } | 1009 } |
| 1091 | 1010 |
| 1092 | 1011 |
| 1093 bool Trace::DeferredAction::Mentions(int that) { | 1012 bool Trace::DeferredAction::Mentions(int that) { |
| 1094 if (type() == ActionNode::CLEAR_CAPTURES) { | 1013 if (type() == ActionNode::CLEAR_CAPTURES) { |
| 1095 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 1014 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
| 1096 return range.Contains(that); | 1015 return range.Contains(that); |
| 1097 } else { | 1016 } else { |
| 1098 return reg() == that; | 1017 return reg() == that; |
| 1099 } | 1018 } |
| (...skipping 2697 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3797 // x{f, t} becomes this: | 3716 // x{f, t} becomes this: |
| 3798 // | 3717 // |
| 3799 // (r++)<-. | 3718 // (r++)<-. |
| 3800 // | ` | 3719 // | ` |
| 3801 // | (x) | 3720 // | (x) |
| 3802 // v ^ | 3721 // v ^ |
| 3803 // (r=0)-->(?)---/ [if r < t] | 3722 // (r=0)-->(?)---/ [if r < t] |
| 3804 // | | 3723 // | |
| 3805 // [if r >= f] \----> ... | 3724 // [if r >= f] \----> ... |
| 3806 // | 3725 // |
| 3726 // |
| 3727 // TODO(someone): clear captures on repetition and handle empty |
| 3728 // matches. |
| 3807 | 3729 |
| 3808 // 15.10.2.5 RepeatMatcher algorithm. | 3730 // 15.10.2.5 RepeatMatcher algorithm. |
| 3809 // The parser has already eliminated the case where max is 0. In the case | 3731 // The parser has already eliminated the case where max is 0. In the case |
| 3810 // where max_match is zero the parser has removed the quantifier if min was | 3732 // where max_match is zero the parser has removed the quantifier if min was |
| 3811 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. | 3733 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. |
| 3812 | 3734 |
| 3813 // If we know that we cannot match zero length then things are a little | 3735 // If we know that we cannot match zero length then things are a little |
| 3814 // simpler since we don't need to make the special zero length match check | 3736 // simpler since we don't need to make the special zero length match check |
| 3815 // from step 2.1. If the min and max are small we can unroll a little in | 3737 // from step 2.1. If the min and max are small we can unroll a little in |
| 3816 // this case. | 3738 // this case. |
| (...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4663 } | 4585 } |
| 4664 } | 4586 } |
| 4665 | 4587 |
| 4666 | 4588 |
| 4667 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 4589 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 4668 RegExpNode* target = that->on_success(); | 4590 RegExpNode* target = that->on_success(); |
| 4669 target->Accept(this); | 4591 target->Accept(this); |
| 4670 } | 4592 } |
| 4671 | 4593 |
| 4672 | 4594 |
| 4673 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, | 4595 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, |
| 4674 bool ignore_case, | 4596 bool ignore_case, |
| 4675 bool is_multiline, | 4597 bool is_multiline, |
| 4676 Handle<String> pattern, | 4598 Handle<String> pattern, |
| 4677 bool is_ascii) { | 4599 bool is_ascii) { |
| 4678 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 4600 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 4679 return IrregexpRegExpTooBig(); | 4601 return IrregexpRegExpTooBig(pattern); |
| 4680 } | 4602 } |
| 4681 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); | 4603 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
| 4682 // Wrap the body of the regexp in capture #0. | 4604 // Wrap the body of the regexp in capture #0. |
| 4683 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4605 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
| 4684 0, | 4606 0, |
| 4685 &compiler, | 4607 &compiler, |
| 4686 compiler.accept()); | 4608 compiler.accept()); |
| 4687 RegExpNode* node = captured_body; | 4609 RegExpNode* node = captured_body; |
| 4688 if (!data->tree->IsAnchored()) { | 4610 if (!data->tree->IsAnchored()) { |
| 4689 // Add a .*? at the beginning, outside the body capture, unless | 4611 // Add a .*? at the beginning, outside the body capture, unless |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4736 EmbeddedVector<byte, 1024> codes; | 4658 EmbeddedVector<byte, 1024> codes; |
| 4737 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4659 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4738 return compiler.Assemble(¯o_assembler, | 4660 return compiler.Assemble(¯o_assembler, |
| 4739 node, | 4661 node, |
| 4740 data->capture_count, | 4662 data->capture_count, |
| 4741 pattern); | 4663 pattern); |
| 4742 } | 4664 } |
| 4743 | 4665 |
| 4744 | 4666 |
| 4745 }} // namespace v8::internal | 4667 }} // namespace v8::internal |
| OLD | NEW |