Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 206 Handle<String> pattern, | 206 Handle<String> pattern, |
| 207 Handle<String> flag_str) { | 207 Handle<String> flag_str) { |
| 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
| 210 bool in_cache = !cached.is_null(); | 210 bool in_cache = !cached.is_null(); |
| 211 LOG(RegExpCompileEvent(re, in_cache)); | 211 LOG(RegExpCompileEvent(re, in_cache)); |
| 212 | 212 |
| 213 Handle<Object> result; | 213 Handle<Object> result; |
| 214 if (in_cache) { | 214 if (in_cache) { |
| 215 re->set_data(*cached); | 215 re->set_data(*cached); |
| 216 result = re; | 216 return re; |
| 217 } else { | 217 } |
| 218 FlattenString(pattern); | 218 FlattenString(pattern); |
| 219 ZoneScope zone_scope(DELETE_ON_EXIT); | 219 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 220 RegExpCompileData parse_result; | 220 RegExpCompileData parse_result; |
| 221 FlatStringReader reader(pattern); | 221 FlatStringReader reader(pattern); |
| 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| 223 // Throw an exception if we fail to parse the pattern. | 223 // Throw an exception if we fail to parse the pattern. |
| 224 ThrowRegExpException(re, | 224 ThrowRegExpException(re, |
| 225 pattern, | 225 pattern, |
| 226 parse_result.error, | 226 parse_result.error, |
| 227 "malformed_regexp"); | 227 "malformed_regexp"); |
| 228 return Handle<Object>::null(); | 228 return Handle<Object>::null(); |
| 229 } | |
| 230 | |
| 231 if (parse_result.simple && !flags.is_ignore_case()) { | |
| 232 // Parse-tree is a single atom that is equal to the pattern. | |
| 233 result = AtomCompile(re, pattern, flags, pattern); | |
| 234 } else if (parse_result.tree->IsAtom() && | |
| 235 !flags.is_ignore_case() && | |
| 236 parse_result.capture_count == 0) { | |
| 237 RegExpAtom* atom = parse_result.tree->AsAtom(); | |
| 238 Vector<const uc16> atom_pattern = atom->data(); | |
| 239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | |
| 240 result = AtomCompile(re, pattern, flags, atom_string); | |
| 241 } else { | |
| 242 result = IrregexpPrepare(re, pattern, flags); | |
| 243 } | |
| 244 Object* data = re->data(); | |
| 245 if (data->IsFixedArray()) { | |
| 246 // If compilation succeeded then the data is set on the regexp | |
| 247 // and we can store it in the cache. | |
| 248 Handle<FixedArray> data(FixedArray::cast(re->data())); | |
| 249 CompilationCache::PutRegExp(pattern, flags, data); | |
| 250 } | |
| 251 } | 229 } |
| 252 | 230 |
| 253 return result; | 231 if (parse_result.simple && !flags.is_ignore_case()) { |
| 232 // Parse-tree is a single atom that is equal to the pattern. | |
| 233 AtomCompile(re, pattern, flags, pattern); | |
| 234 } else if (parse_result.tree->IsAtom() && | |
| 235 !flags.is_ignore_case() && | |
| 236 parse_result.capture_count == 0) { | |
| 237 RegExpAtom* atom = parse_result.tree->AsAtom(); | |
| 238 Vector<const uc16> atom_pattern = atom->data(); | |
| 239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | |
| 240 AtomCompile(re, pattern, flags, atom_string); | |
| 241 } else { | |
| 242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | |
| 243 } | |
| 244 ASSERT(re->data()->IsFixedArray()); | |
| 245 // Compilation succeeded so the data is set on the regexp | |
| 246 // and we can store it in the cache. | |
| 247 Handle<FixedArray> data(FixedArray::cast(re->data())); | |
| 248 CompilationCache::PutRegExp(pattern, flags, data); | |
| 249 | |
| 250 return re; | |
| 254 } | 251 } |
| 255 | 252 |
| 256 | 253 |
| 257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| 258 Handle<String> subject, | 255 Handle<String> subject, |
| 259 Handle<Object> index) { | 256 int index, |
| 257 Handle<JSArray> last_match_info) { | |
| 260 switch (regexp->TypeTag()) { | 258 switch (regexp->TypeTag()) { |
| 261 case JSRegExp::ATOM: | 259 case JSRegExp::ATOM: |
| 262 return AtomExec(regexp, subject, index); | 260 return AtomExec(regexp, subject, index, last_match_info); |
| 263 case JSRegExp::IRREGEXP: { | 261 case JSRegExp::IRREGEXP: { |
| 264 Handle<Object> result = IrregexpExec(regexp, subject, index); | 262 Handle<Object> result = |
| 263 IrregexpExec(regexp, subject, index, last_match_info); | |
| 265 ASSERT(!result.is_null() || Top::has_pending_exception()); | 264 ASSERT(!result.is_null() || Top::has_pending_exception()); |
| 266 return result; | 265 return result; |
| 267 } | 266 } |
| 268 default: | 267 default: |
| 269 UNREACHABLE(); | 268 UNREACHABLE(); |
| 270 return Handle<Object>::null(); | 269 return Handle<Object>::null(); |
| 271 } | 270 } |
| 272 } | 271 } |
| 273 | 272 |
| 274 | 273 |
| 275 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| 276 Handle<String> subject) { | 275 Handle<String> subject, |
| 276 Handle<JSArray> last_match_info) { | |
| 277 switch (regexp->TypeTag()) { | 277 switch (regexp->TypeTag()) { |
| 278 case JSRegExp::ATOM: | 278 case JSRegExp::ATOM: |
| 279 return AtomExecGlobal(regexp, subject); | 279 return AtomExecGlobal(regexp, subject, last_match_info); |
| 280 case JSRegExp::IRREGEXP: { | 280 case JSRegExp::IRREGEXP: { |
| 281 Handle<Object> result = IrregexpExecGlobal(regexp, subject); | 281 Handle<Object> result = |
| 282 IrregexpExecGlobal(regexp, subject, last_match_info); | |
| 282 ASSERT(!result.is_null() || Top::has_pending_exception()); | 283 ASSERT(!result.is_null() || Top::has_pending_exception()); |
| 283 return result; | 284 return result; |
| 284 } | 285 } |
| 285 default: | 286 default: |
| 286 UNREACHABLE(); | 287 UNREACHABLE(); |
| 287 return Handle<Object>::null(); | 288 return Handle<Object>::null(); |
| 288 } | 289 } |
| 289 } | 290 } |
| 290 | 291 |
| 291 | 292 |
| 292 // RegExp Atom implementation: Simple string search using indexOf. | 293 // RegExp Atom implementation: Simple string search using indexOf. |
| 293 | 294 |
| 294 | 295 |
| 295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| 296 Handle<String> pattern, | 297 Handle<String> pattern, |
| 297 JSRegExp::Flags flags, | 298 JSRegExp::Flags flags, |
| 298 Handle<String> match_pattern) { | 299 Handle<String> match_pattern) { |
| 299 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); | 300 Factory::SetRegExpAtomData(re, |
| 300 return re; | 301 JSRegExp::ATOM, |
| 302 pattern, | |
| 303 flags, | |
| 304 match_pattern); | |
| 305 } | |
| 306 | |
| 307 | |
| 308 static void SetAtomLastCapture(FixedArray* array, | |
| 309 String* subject, | |
| 310 int from, | |
| 311 int to) { | |
| 312 NoHandleAllocation no_handles; | |
| 313 RegExpImpl::SetLastCaptureCount(array, 2); | |
| 314 RegExpImpl::SetLastSubject(array, subject); | |
| 315 RegExpImpl::SetLastInput(array, subject); | |
| 316 RegExpImpl::SetCapture(array, 0, from); | |
| 317 RegExpImpl::SetCapture(array, 1, to); | |
| 301 } | 318 } |
| 302 | 319 |
| 303 | 320 |
| 304 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 321 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| 305 Handle<String> subject, | 322 Handle<String> subject, |
| 306 Handle<Object> index) { | 323 int index, |
| 324 Handle<JSArray> last_match_info) { | |
| 307 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 325 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| 308 | 326 |
| 309 uint32_t start_index; | 327 uint32_t start_index = index; |
| 310 if (!Array::IndexFromObject(*index, &start_index)) { | |
| 311 return Handle<Smi>(Smi::FromInt(-1)); | |
| 312 } | |
| 313 | 328 |
| 314 int value = Runtime::StringMatch(subject, needle, start_index); | 329 int value = Runtime::StringMatch(subject, needle, start_index); |
| 315 if (value == -1) return Factory::null_value(); | 330 if (value == -1) return Factory::null_value(); |
| 331 ASSERT(last_match_info->HasFastElements()); | |
| 316 | 332 |
| 317 Handle<FixedArray> array = Factory::NewFixedArray(2); | 333 { |
| 318 array->set(0, Smi::FromInt(value)); | 334 NoHandleAllocation no_handles; |
| 319 array->set(1, Smi::FromInt(value + needle->length())); | 335 FixedArray* array = last_match_info->elements(); |
| 320 return Factory::NewJSArrayWithElements(array); | 336 SetAtomLastCapture(array, *subject, value, value + needle->length()); |
| 337 } | |
| 338 return last_match_info; | |
| 321 } | 339 } |
| 322 | 340 |
| 323 | 341 |
| 324 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, | 342 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
| 325 Handle<String> subject) { | 343 Handle<String> subject, |
| 344 Handle<JSArray> last_match_info) { | |
| 326 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | 345 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| 346 ASSERT(last_match_info->HasFastElements()); | |
| 327 Handle<JSArray> result = Factory::NewJSArray(1); | 347 Handle<JSArray> result = Factory::NewJSArray(1); |
| 328 int index = 0; | 348 int index = 0; |
| 329 int match_count = 0; | 349 int match_count = 0; |
| 330 int subject_length = subject->length(); | 350 int subject_length = subject->length(); |
| 331 int needle_length = needle->length(); | 351 int needle_length = needle->length(); |
| 352 int last_value = -1; | |
| 332 while (true) { | 353 while (true) { |
| 354 HandleScope scope; | |
| 333 int value = -1; | 355 int value = -1; |
| 334 if (index + needle_length <= subject_length) { | 356 if (index + needle_length <= subject_length) { |
| 335 value = Runtime::StringMatch(subject, needle, index); | 357 value = Runtime::StringMatch(subject, needle, index); |
| 336 } | 358 } |
| 337 if (value == -1) break; | 359 if (value == -1) { |
| 338 HandleScope scope; | 360 if (last_value != -1) { |
| 361 Handle<FixedArray> array(last_match_info->elements()); | |
| 362 SetAtomLastCapture(*array, | |
| 363 *subject, | |
| 364 last_value, | |
| 365 last_value + needle->length()); | |
| 366 } | |
| 367 break; | |
| 368 } | |
| 369 | |
| 339 int end = value + needle_length; | 370 int end = value + needle_length; |
| 340 | 371 |
| 341 Handle<FixedArray> array = Factory::NewFixedArray(2); | 372 // Create an array that looks like the static last_match_info array |
| 342 array->set(0, Smi::FromInt(value)); | 373 // that is attached to the global RegExp object. We will be returning |
| 343 array->set(1, Smi::FromInt(end)); | 374 // an array of these. |
| 375 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); | |
| 376 SetCapture(*array, 0, value); | |
| 377 SetCapture(*array, 1, end); | |
| 378 SetLastCaptureCount(*array, 2); | |
| 344 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); | 379 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
| 345 SetElement(result, match_count, pair); | 380 SetElement(result, match_count, pair); |
| 346 match_count++; | 381 match_count++; |
| 347 index = end; | 382 index = end; |
| 348 if (needle_length == 0) index++; | 383 if (needle_length == 0) index++; |
| 384 last_value = value; | |
| 349 } | 385 } |
| 350 return result; | 386 return result; |
| 351 } | 387 } |
| 352 | 388 |
| 353 | 389 |
| 354 // Irregexp implementation. | 390 // Irregexp implementation. |
| 355 | 391 |
| 356 | 392 |
| 357 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII | 393 // Ensures that the regexp object contains a compiled version of the |
| 358 // strings. If the compiled version doesn't already exist, it is compiled | 394 // source for either ASCII or non-ASCII strings. |
| 395 // If the compiled version doesn't already exist, it is compiled | |
| 359 // from the source pattern. | 396 // from the source pattern. |
| 360 // Irregexp is not feature complete yet. If there is something in the | 397 // If compilation fails, an exception is thrown and this function |
| 361 // regexp that the compiler cannot currently handle, an empty | 398 // returns false. |
| 362 // handle is returned, but no exception is thrown. | 399 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, |
| 363 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, | 400 bool is_ascii) { |
| 364 bool is_ascii) { | 401 int index; |
| 365 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); | 402 if (is_ascii) { |
| 366 Handle<FixedArray> alternatives( | 403 index = JSRegExp::kIrregexpASCIICodeIndex; |
| 367 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); | 404 } else { |
| 368 ASSERT_EQ(2, alternatives->length()); | 405 index = JSRegExp::kIrregexpUC16CodeIndex; |
| 369 | 406 } |
| 370 int index = is_ascii ? 0 : 1; | 407 Object* entry = re->DataAt(index); |
| 371 Object* entry = alternatives->get(index); | 408 if (!entry->IsTheHole()) { |
| 372 if (!entry->IsNull()) { | 409 // A value has already been compiled. |
| 373 return Handle<FixedArray>(FixedArray::cast(entry)); | 410 if (entry->IsJSObject()) { |
| 411 // If it's a JS value, it's an error. | |
| 412 Top::Throw(entry); | |
| 413 return false; | |
| 414 } | |
| 415 return true; | |
| 374 } | 416 } |
| 375 | 417 |
| 376 // Compile the RegExp. | 418 // Compile the RegExp. |
| 377 ZoneScope zone_scope(DELETE_ON_EXIT); | 419 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 378 | 420 |
| 379 JSRegExp::Flags flags = re->GetFlags(); | 421 JSRegExp::Flags flags = re->GetFlags(); |
| 380 | 422 |
| 381 Handle<String> pattern(re->Pattern()); | 423 Handle<String> pattern(re->Pattern()); |
| 382 if (!pattern->IsFlat(StringShape(*pattern))) { | 424 if (!pattern->IsFlat(StringShape(*pattern))) { |
| 383 FlattenString(pattern); | 425 FlattenString(pattern); |
| 384 } | 426 } |
| 385 | 427 |
| 386 RegExpCompileData compile_data; | 428 RegExpCompileData compile_data; |
| 387 FlatStringReader reader(pattern); | 429 FlatStringReader reader(pattern); |
| 388 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { | 430 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { |
| 389 // Throw an exception if we fail to parse the pattern. | 431 // Throw an exception if we fail to parse the pattern. |
| 390 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. | 432 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. |
| 391 ThrowRegExpException(re, | 433 ThrowRegExpException(re, |
| 392 pattern, | 434 pattern, |
| 393 compile_data.error, | 435 compile_data.error, |
| 394 "malformed_regexp"); | 436 "malformed_regexp"); |
| 395 return Handle<FixedArray>::null(); | 437 return false; |
| 396 } | 438 } |
| 397 Handle<FixedArray> compiled_entry = | 439 RegExpEngine::CompilationResult result = |
| 398 RegExpEngine::Compile(&compile_data, | 440 RegExpEngine::Compile(&compile_data, |
| 399 flags.is_ignore_case(), | 441 flags.is_ignore_case(), |
| 400 flags.is_multiline(), | 442 flags.is_multiline(), |
| 401 pattern, | 443 pattern, |
| 402 is_ascii); | 444 is_ascii); |
| 403 if (!compiled_entry.is_null()) { | 445 if (result.error_message != NULL) { |
| 404 alternatives->set(index, *compiled_entry); | 446 // Unable to compile regexp. |
| 447 Handle<JSArray> array = Factory::NewJSArray(2); | |
| 448 SetElement(array, 0, pattern); | |
| 449 SetElement(array, | |
| 450 1, | |
| 451 Factory::NewStringFromUtf8(CStrVector(result.error_message))); | |
| 452 Handle<Object> regexp_err = | |
| 453 Factory::NewSyntaxError("malformed_regexp", array); | |
| 454 Top::Throw(*regexp_err); | |
| 455 re->SetDataAt(index, *regexp_err); | |
| 456 return false; | |
| 405 } | 457 } |
| 406 return compiled_entry; | 458 |
| 459 NoHandleAllocation no_handles; | |
| 460 | |
| 461 FixedArray* data = FixedArray::cast(re->data()); | |
| 462 data->set(index, result.code); | |
| 463 int register_max = IrregexpMaxRegisterCount(data); | |
| 464 if (result.num_registers > register_max) { | |
| 465 SetIrregexpMaxRegisterCount(data, result.num_registers); | |
| 466 } | |
| 467 | |
| 468 return true; | |
| 407 } | 469 } |
| 408 | 470 |
| 409 | 471 |
| 410 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { | 472 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { |
| 411 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); | 473 return Smi::cast( |
| 474 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); | |
| 412 } | 475 } |
| 413 | 476 |
| 414 | 477 |
| 415 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { | 478 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { |
| 416 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); | 479 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); |
| 417 } | 480 } |
| 418 | 481 |
| 419 | 482 |
| 420 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { | 483 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { |
| 421 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() | 484 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); |
| 422 == RegExpMacroAssembler::kBytecodeImplementation); | |
| 423 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex))); | |
| 424 } | 485 } |
| 425 | 486 |
| 426 | 487 |
| 427 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { | 488 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { |
| 428 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() | 489 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); |
| 429 != RegExpMacroAssembler::kBytecodeImplementation); | |
| 430 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex))); | |
| 431 } | 490 } |
| 432 | 491 |
| 433 | 492 |
| 434 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | 493 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { |
| 435 Handle<String> pattern, | 494 int index; |
| 436 JSRegExp::Flags flags) { | 495 if (is_ascii) { |
| 437 // Make space for ASCII and UC16 versions. | 496 index = JSRegExp::kIrregexpASCIICodeIndex; |
| 438 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); | 497 } else { |
| 439 alternatives->set_null(0); | 498 index = JSRegExp::kIrregexpUC16CodeIndex; |
| 440 alternatives->set_null(1); | 499 } |
| 441 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); | 500 return ByteArray::cast(re->get(index)); |
| 442 return re; | 501 } |
| 502 | |
| 503 | |
| 504 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | |
| 505 int index; | |
| 506 if (is_ascii) { | |
| 507 index = JSRegExp::kIrregexpASCIICodeIndex; | |
| 508 } else { | |
| 509 index = JSRegExp::kIrregexpUC16CodeIndex; | |
| 510 } | |
| 511 return Code::cast(re->get(index)); | |
| 512 } | |
| 513 | |
| 514 | |
| 515 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | |
| 516 Handle<String> pattern, | |
| 517 JSRegExp::Flags flags, | |
| 518 int capture_count) { | |
| 519 // Initialize compiled code entries to null. | |
| 520 Factory::SetRegExpIrregexpData(re, | |
| 521 JSRegExp::IRREGEXP, | |
| 522 pattern, | |
| 523 flags, | |
| 524 capture_count); | |
| 443 } | 525 } |
| 444 | 526 |
| 445 | 527 |
| 446 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 528 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
| 447 Handle<String> subject, | 529 Handle<String> subject, |
| 448 Handle<Object> index) { | 530 int index, |
| 531 Handle<JSArray> last_match_info) { | |
| 449 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 532 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 450 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); | |
| 451 | 533 |
| 452 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 534 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| 453 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); | 535 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
| 454 if (irregexp.is_null()) { | |
| 455 // We can't handle the RegExp with IRRegExp. | |
| 456 return Handle<Object>::null(); | 536 return Handle<Object>::null(); |
| 457 } | 537 } |
| 458 | 538 |
| 459 // Prepare space for the return values. | 539 // Prepare space for the return values. |
| 460 int number_of_registers = IrregexpNumberOfRegisters(irregexp); | 540 Handle<FixedArray> re_data(FixedArray::cast(regexp->data())); |
| 461 OffsetsVector offsets(number_of_registers); | 541 int number_of_capture_registers = |
| 542 (IrregexpNumberOfCaptures(*re_data) + 1) * 2; | |
| 543 OffsetsVector offsets(number_of_capture_registers); | |
| 462 | 544 |
| 463 int num_captures = IrregexpNumberOfCaptures(irregexp); | 545 int previous_index = index; |
| 464 | |
| 465 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | |
| 466 | 546 |
| 467 #ifdef DEBUG | 547 #ifdef DEBUG |
| 468 if (FLAG_trace_regexp_bytecodes) { | 548 if (FLAG_trace_regexp_bytecodes) { |
| 469 String* pattern = regexp->Pattern(); | 549 String* pattern = regexp->Pattern(); |
| 470 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 550 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 471 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 551 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 472 } | 552 } |
| 473 #endif | 553 #endif |
| 474 | 554 |
| 475 if (!subject->IsFlat(StringShape(*subject))) { | 555 if (!subject->IsFlat(StringShape(*subject))) { |
| 476 FlattenString(subject); | 556 FlattenString(subject); |
| 477 } | 557 } |
| 478 | 558 |
| 479 return IrregexpExecOnce(irregexp, | 559 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
| 480 num_captures, | 560 |
| 561 return IrregexpExecOnce(re_data, | |
| 562 number_of_capture_registers, | |
| 563 last_match_info, | |
| 481 subject, | 564 subject, |
| 482 previous_index, | 565 previous_index, |
| 483 offsets.vector(), | 566 offsets.vector(), |
| 484 offsets.length()); | 567 offsets.length()); |
| 485 } | 568 } |
| 486 | 569 |
| 487 | 570 |
| 488 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 571 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
| 489 Handle<String> subject) { | 572 Handle<String> subject, |
| 573 Handle<JSArray> last_match_info) { | |
| 490 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 574 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 575 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | |
| 491 | 576 |
| 492 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 577 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| 493 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); | 578 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
| 494 if (irregexp.is_null()) { | |
| 495 return Handle<Object>::null(); | 579 return Handle<Object>::null(); |
| 496 } | 580 } |
| 497 | 581 |
| 498 // Prepare space for the return values. | 582 // Prepare space for the return values. |
| 499 int number_of_registers = IrregexpNumberOfRegisters(irregexp); | 583 int number_of_capture_registers = |
| 500 OffsetsVector offsets(number_of_registers); | 584 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| 585 OffsetsVector offsets(number_of_capture_registers); | |
| 501 | 586 |
| 502 int previous_index = 0; | 587 int previous_index = 0; |
| 503 | 588 |
| 504 Handle<JSArray> result = Factory::NewJSArray(0); | 589 Handle<JSArray> result = Factory::NewJSArray(0); |
| 505 int i = 0; | 590 int result_length = 0; |
| 506 Handle<Object> matches; | 591 Handle<Object> matches; |
| 507 | 592 |
| 508 if (!subject->IsFlat(StringShape(*subject))) { | 593 if (!subject->IsFlat(StringShape(*subject))) { |
| 509 FlattenString(subject); | 594 FlattenString(subject); |
| 510 } | 595 } |
| 511 | 596 |
| 597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
| 598 | |
| 512 while (true) { | 599 while (true) { |
| 513 if (previous_index > subject->length() || previous_index < 0) { | 600 if (previous_index > subject->length() || previous_index < 0) { |
| 514 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| 515 // string length, there is no match. | 602 // string length, there is no match. |
| 516 matches = Factory::null_value(); | 603 matches = Factory::null_value(); |
|
Mads Ager (chromium)
2009/03/11 13:49:17
I know this is not your code, but why is there an
Erik Corry
2009/03/11 14:01:06
Lasse has a patch waiting that also fixes this.
| |
| 517 return result; | 604 return result; |
| 518 } else { | 605 } else { |
| 519 #ifdef DEBUG | 606 #ifdef DEBUG |
| 520 if (FLAG_trace_regexp_bytecodes) { | 607 if (FLAG_trace_regexp_bytecodes) { |
| 521 String* pattern = regexp->Pattern(); | 608 String* pattern = regexp->Pattern(); |
| 522 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 523 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 524 } | 611 } |
| 525 #endif | 612 #endif |
| 613 HandleScope scope; | |
| 526 matches = IrregexpExecOnce(irregexp, | 614 matches = IrregexpExecOnce(irregexp, |
| 527 IrregexpNumberOfCaptures(irregexp), | 615 number_of_capture_registers, |
| 616 last_match_info, | |
| 528 subject, | 617 subject, |
| 529 previous_index, | 618 previous_index, |
| 530 offsets.vector(), | 619 offsets.vector(), |
| 531 offsets.length()); | 620 offsets.length()); |
| 532 | 621 |
| 533 if (matches.is_null()) { | 622 if (matches.is_null()) { |
| 534 ASSERT(Top::has_pending_exception()); | 623 ASSERT(Top::has_pending_exception()); |
| 535 return matches; | 624 return matches; |
| 536 } | 625 } |
| 537 | 626 |
| 538 if (matches->IsJSArray()) { | 627 if (matches->IsJSArray()) { |
| 539 SetElement(result, i, matches); | 628 // Create an array that looks like the static last_match_info array |
| 540 i++; | 629 // that is attached to the global RegExp object. We will be returning |
| 541 previous_index = offsets.vector()[1]; | 630 // an array of these. |
| 542 if (offsets.vector()[0] == offsets.vector()[1]) { | 631 Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); |
| 632 Handle<JSArray> latest_match = | |
| 633 Factory::NewJSArray(kFirstCapture + number_of_capture_registers); | |
| 634 Handle<FixedArray> latest_match_array(latest_match->elements()); | |
| 635 | |
| 636 for (int i = 0; i < number_of_capture_registers; i++) { | |
| 637 SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); | |
| 638 } | |
| 639 SetLastCaptureCount(*latest_match_array, number_of_capture_registers); | |
| 640 | |
| 641 SetElement(result, result_length, latest_match); | |
| 642 result_length++; | |
| 643 previous_index = GetCapture(*matches_array, 1); | |
| 644 if (GetCapture(*matches_array, 0) == previous_index) | |
| 543 previous_index++; | 645 previous_index++; |
| 544 } | 646 |
| 545 } else { | 647 } else { |
| 546 ASSERT(matches->IsNull()); | 648 ASSERT(matches->IsNull()); |
| 547 return result; | 649 return result; |
| 548 } | 650 } |
| 549 } | 651 } |
| 550 } | 652 } |
| 551 } | 653 } |
| 552 | 654 |
| 553 | 655 |
| 554 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, | 656 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp, |
| 555 int num_captures, | 657 int number_of_capture_registers, |
| 658 Handle<JSArray> last_match_info, | |
| 556 Handle<String> subject, | 659 Handle<String> subject, |
| 557 int previous_index, | 660 int previous_index, |
| 558 int* offsets_vector, | 661 int* offsets_vector, |
| 559 int offsets_vector_length) { | 662 int offsets_vector_length) { |
| 560 ASSERT(subject->IsFlat(StringShape(*subject))); | 663 StringShape shape(*subject); |
| 664 ASSERT(subject->IsFlat(shape)); | |
| 665 bool is_ascii = shape.IsAsciiRepresentation(); | |
| 561 bool rc; | 666 bool rc; |
| 562 | 667 |
| 563 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | 668 Handle<String> original_subject = subject; |
| 669 if (FLAG_regexp_native) { | |
| 670 #ifndef ARM | |
| 671 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); | |
| 564 | 672 |
| 565 switch (tag) { | 673 // Character offsets into string. |
| 566 case RegExpMacroAssembler::kIA32Implementation: { | 674 int start_offset = previous_index; |
| 567 #ifndef ARM | 675 int end_offset = subject->length(shape); |
| 568 Handle<Code> code = IrregexpNativeCode(irregexp); | |
| 569 | 676 |
| 570 StringShape shape(*subject); | 677 if (shape.IsCons()) { |
| 678 subject = Handle<String>(ConsString::cast(*subject)->first()); | |
| 679 } else if (shape.IsSliced()) { | |
| 680 SlicedString* slice = SlicedString::cast(*subject); | |
| 681 start_offset += slice->start(); | |
| 682 end_offset += slice->start(); | |
| 683 subject = Handle<String>(slice->buffer()); | |
| 684 } | |
| 571 | 685 |
| 572 // Character offsets into string. | 686 // String is now either Sequential or External |
| 573 int start_offset = previous_index; | 687 StringShape flatshape(*subject); |
| 574 int end_offset = subject->length(shape); | 688 bool is_ascii = flatshape.IsAsciiRepresentation(); |
| 689 int char_size_shift = is_ascii ? 0 : 1; | |
| 575 | 690 |
| 576 if (shape.IsCons()) { | 691 RegExpMacroAssemblerIA32::Result res; |
| 577 subject = Handle<String>(ConsString::cast(*subject)->first()); | 692 |
| 578 } else if (shape.IsSliced()) { | 693 if (flatshape.IsExternal()) { |
| 579 SlicedString* slice = SlicedString::cast(*subject); | 694 const byte* address; |
| 580 start_offset += slice->start(); | 695 if (is_ascii) { |
| 581 end_offset += slice->start(); | 696 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
| 582 subject = Handle<String>(slice->buffer()); | 697 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| 698 } else { | |
| 699 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | |
| 700 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
| 583 } | 701 } |
| 702 res = RegExpMacroAssemblerIA32::Execute( | |
| 703 *code, | |
| 704 const_cast<Address*>(&address), | |
| 705 start_offset << char_size_shift, | |
| 706 end_offset << char_size_shift, | |
| 707 offsets_vector, | |
| 708 previous_index == 0); | |
| 709 } else { // Sequential string | |
| 710 ASSERT(StringShape(*subject).IsSequential()); | |
| 711 Address char_address = | |
| 712 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | |
| 713 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | |
| 714 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | |
| 715 res = RegExpMacroAssemblerIA32::Execute( | |
| 716 *code, | |
| 717 reinterpret_cast<Address*>(subject.location()), | |
| 718 byte_offset + (start_offset << char_size_shift), | |
| 719 byte_offset + (end_offset << char_size_shift), | |
| 720 offsets_vector, | |
| 721 previous_index == 0); | |
| 722 } | |
| 584 | 723 |
| 585 // String is now either Sequential or External | 724 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
| 586 StringShape flatshape(*subject); | 725 ASSERT(Top::has_pending_exception()); |
| 587 bool is_ascii = flatshape.IsAsciiRepresentation(); | 726 return Handle<Object>::null(); |
| 588 int char_size_shift = is_ascii ? 0 : 1; | 727 } |
| 728 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); | |
| 589 | 729 |
| 590 RegExpMacroAssemblerIA32::Result res; | 730 if (rc) { |
| 591 | 731 // Capture values are relative to start_offset only. |
| 592 if (flatshape.IsExternal()) { | 732 for (int i = 0; i < offsets_vector_length; i++) { |
| 593 const byte* address; | 733 if (offsets_vector[i] >= 0) { |
| 594 if (is_ascii) { | 734 offsets_vector[i] += previous_index; |
| 595 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); | |
| 596 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
| 597 } else { | |
| 598 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | |
| 599 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
| 600 } | |
| 601 res = RegExpMacroAssemblerIA32::Execute( | |
| 602 *code, | |
| 603 const_cast<Address*>(&address), | |
| 604 start_offset << char_size_shift, | |
| 605 end_offset << char_size_shift, | |
| 606 offsets_vector, | |
| 607 previous_index == 0); | |
| 608 } else { // Sequential string | |
| 609 ASSERT(StringShape(*subject).IsSequential()); | |
| 610 Address char_address = | |
| 611 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | |
| 612 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | |
| 613 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | |
| 614 res = RegExpMacroAssemblerIA32::Execute( | |
| 615 *code, | |
| 616 reinterpret_cast<Address*>(subject.location()), | |
| 617 byte_offset + (start_offset << char_size_shift), | |
| 618 byte_offset + (end_offset << char_size_shift), | |
| 619 offsets_vector, | |
| 620 previous_index == 0); | |
| 621 } | |
| 622 | |
| 623 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { | |
| 624 ASSERT(Top::has_pending_exception()); | |
| 625 return Handle<Object>::null(); | |
| 626 } | |
| 627 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); | |
| 628 | |
| 629 if (rc) { | |
| 630 // Capture values are relative to start_offset only. | |
| 631 for (int i = 0; i < offsets_vector_length; i++) { | |
| 632 if (offsets_vector[i] >= 0) { | |
| 633 offsets_vector[i] += previous_index; | |
| 634 } | |
| 635 } | 735 } |
| 636 } | 736 } |
| 637 break; | 737 } |
| 738 } else { | |
| 638 #else | 739 #else |
| 639 UNIMPLEMENTED(); | 740 // Unimplemented on ARM, fall through to bytecode. |
|
Mads Ager (chromium)
2009/03/11 13:49:17
Auch, this is hard to read. Can we factor this di
Lasse Reichstein
2009/03/11 13:54:03
I'm all for factoring it differently, but I think
Erik Corry
2009/03/11 14:01:06
I'll leave it alone for now.
| |
| 640 rc = false; | 741 } |
| 641 break; | 742 { |
| 642 #endif | 743 #endif |
| 744 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
| 745 offsets_vector[i] = -1; | |
| 643 } | 746 } |
| 644 case RegExpMacroAssembler::kBytecodeImplementation: { | 747 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); |
| 645 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | |
| 646 offsets_vector[i] = -1; | |
| 647 } | |
| 648 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); | |
| 649 | 748 |
| 650 rc = IrregexpInterpreter::Match(byte_codes, | 749 rc = IrregexpInterpreter::Match(byte_codes, |
| 651 subject, | 750 subject, |
| 652 offsets_vector, | 751 offsets_vector, |
| 653 previous_index); | 752 previous_index); |
| 654 break; | |
| 655 } | |
| 656 case RegExpMacroAssembler::kARMImplementation: | |
| 657 default: | |
| 658 UNREACHABLE(); | |
| 659 rc = false; | |
| 660 break; | |
| 661 } | 753 } |
| 662 | 754 |
| 663 if (!rc) { | 755 if (!rc) { |
| 664 return Factory::null_value(); | 756 return Factory::null_value(); |
| 665 } | 757 } |
| 666 | 758 |
| 667 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 759 FixedArray* array = last_match_info->elements(); |
| 760 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
| 668 // The captures come in (start, end+1) pairs. | 761 // The captures come in (start, end+1) pairs. |
| 669 for (int i = 0; i < 2 * (num_captures + 1); i += 2) { | 762 for (int i = 0; i < number_of_capture_registers; i += 2) { |
| 670 array->set(i, Smi::FromInt(offsets_vector[i])); | 763 SetCapture(array, i, offsets_vector[i]); |
| 671 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); | 764 SetCapture(array, i + 1, offsets_vector[i + 1]); |
| 672 } | 765 } |
| 673 return Factory::NewJSArrayWithElements(array); | 766 SetLastCaptureCount(array, number_of_capture_registers); |
| 767 SetLastSubject(array, *original_subject); | |
| 768 SetLastInput(array, *original_subject); | |
| 769 return last_match_info; | |
| 674 } | 770 } |
| 675 | 771 |
| 676 | 772 |
| 677 // ------------------------------------------------------------------- | 773 // ------------------------------------------------------------------- |
| 678 // Implmentation of the Irregexp regular expression engine. | 774 // Implementation of the Irregexp regular expression engine. |
| 679 // | 775 // |
| 680 // The Irregexp regular expression engine is intended to be a complete | 776 // The Irregexp regular expression engine is intended to be a complete |
| 681 // implementation of ECMAScript regular expressions. It generates either | 777 // implementation of ECMAScript regular expressions. It generates either |
| 682 // bytecodes or native code. | 778 // bytecodes or native code. |
| 683 | 779 |
| 684 // The Irregexp regexp engine is structured in three steps. | 780 // The Irregexp regexp engine is structured in three steps. |
| 685 // 1) The parser generates an abstract syntax tree. See ast.cc. | 781 // 1) The parser generates an abstract syntax tree. See ast.cc. |
| 686 // 2) From the AST a node network is created. The nodes are all | 782 // 2) From the AST a node network is created. The nodes are all |
| 687 // subclasses of RegExpNode. The nodes represent states when | 783 // subclasses of RegExpNode. The nodes represent states when |
| 688 // executing a regular expression. Several optimizations are | 784 // executing a regular expression. Several optimizations are |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 885 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); | 981 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); |
| 886 | 982 |
| 887 int AllocateRegister() { | 983 int AllocateRegister() { |
| 888 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { | 984 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { |
| 889 reg_exp_too_big_ = true; | 985 reg_exp_too_big_ = true; |
| 890 return next_register_; | 986 return next_register_; |
| 891 } | 987 } |
| 892 return next_register_++; | 988 return next_register_++; |
| 893 } | 989 } |
| 894 | 990 |
| 895 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 991 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, |
| 896 RegExpNode* start, | 992 RegExpNode* start, |
| 897 int capture_count, | 993 int capture_count, |
| 898 Handle<String> pattern); | 994 Handle<String> pattern); |
| 899 | 995 |
| 900 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 996 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 901 | 997 |
| 902 static const int kImplementationOffset = 0; | 998 static const int kImplementationOffset = 0; |
| 903 static const int kNumberOfRegistersOffset = 0; | 999 static const int kNumberOfRegistersOffset = 0; |
| 904 static const int kCodeOffset = 1; | 1000 static const int kCodeOffset = 1; |
| 905 | 1001 |
| 906 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 1002 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
| 907 EndNode* accept() { return accept_; } | 1003 EndNode* accept() { return accept_; } |
| 908 | 1004 |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 933 public: | 1029 public: |
| 934 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 1030 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
| 935 compiler->IncrementRecursionDepth(); | 1031 compiler->IncrementRecursionDepth(); |
| 936 } | 1032 } |
| 937 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 1033 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 938 private: | 1034 private: |
| 939 RegExpCompiler* compiler_; | 1035 RegExpCompiler* compiler_; |
| 940 }; | 1036 }; |
| 941 | 1037 |
| 942 | 1038 |
| 943 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { | 1039 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
| 944 Handle<JSArray> array = Factory::NewJSArray(2); | 1040 return RegExpEngine::CompilationResult("RegExp too big"); |
| 945 SetElement(array, 0, pattern); | |
| 946 const char* message = "RegExp too big"; | |
| 947 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); | |
| 948 Handle<Object> regexp_err = | |
| 949 Factory::NewSyntaxError("malformed_regexp", array); | |
| 950 Top::Throw(*regexp_err); | |
| 951 return Handle<FixedArray>(); | |
| 952 } | 1041 } |
| 953 | 1042 |
| 954 | 1043 |
| 955 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1044 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 956 // a fixed array or a null handle depending on whether it succeeded. | 1045 // a fixed array or a null handle depending on whether it succeeded. |
| 957 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 1046 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
| 958 : next_register_(2 * (capture_count + 1)), | 1047 : next_register_(2 * (capture_count + 1)), |
| 959 work_list_(NULL), | 1048 work_list_(NULL), |
| 960 recursion_depth_(0), | 1049 recursion_depth_(0), |
| 961 ignore_case_(ignore_case), | 1050 ignore_case_(ignore_case), |
| 962 ascii_(ascii), | 1051 ascii_(ascii), |
| 963 reg_exp_too_big_(false) { | 1052 reg_exp_too_big_(false) { |
| 964 accept_ = new EndNode(EndNode::ACCEPT); | 1053 accept_ = new EndNode(EndNode::ACCEPT); |
| 965 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 1054 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
| 966 } | 1055 } |
| 967 | 1056 |
| 968 | 1057 |
| 969 Handle<FixedArray> RegExpCompiler::Assemble( | 1058 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| 970 RegExpMacroAssembler* macro_assembler, | 1059 RegExpMacroAssembler* macro_assembler, |
| 971 RegExpNode* start, | 1060 RegExpNode* start, |
| 972 int capture_count, | 1061 int capture_count, |
| 973 Handle<String> pattern) { | 1062 Handle<String> pattern) { |
| 974 #ifdef DEBUG | 1063 #ifdef DEBUG |
| 975 if (FLAG_trace_regexp_assembler) | 1064 if (FLAG_trace_regexp_assembler) |
| 976 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | 1065 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
| 977 else | 1066 else |
| 978 #endif | 1067 #endif |
| 979 macro_assembler_ = macro_assembler; | 1068 macro_assembler_ = macro_assembler; |
| 980 List <RegExpNode*> work_list(0); | 1069 List <RegExpNode*> work_list(0); |
| 981 work_list_ = &work_list; | 1070 work_list_ = &work_list; |
| 982 Label fail; | 1071 Label fail; |
| 983 macro_assembler_->PushBacktrack(&fail); | 1072 macro_assembler_->PushBacktrack(&fail); |
| 984 Trace new_trace; | 1073 Trace new_trace; |
| 985 start->Emit(this, &new_trace); | 1074 start->Emit(this, &new_trace); |
| 986 macro_assembler_->Bind(&fail); | 1075 macro_assembler_->Bind(&fail); |
| 987 macro_assembler_->Fail(); | 1076 macro_assembler_->Fail(); |
| 988 while (!work_list.is_empty()) { | 1077 while (!work_list.is_empty()) { |
| 989 work_list.RemoveLast()->Emit(this, &new_trace); | 1078 work_list.RemoveLast()->Emit(this, &new_trace); |
| 990 } | 1079 } |
| 991 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); | 1080 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
| 992 Handle<FixedArray> array = | 1081 |
| 993 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); | |
| 994 array->set(RegExpImpl::kIrregexpImplementationIndex, | |
| 995 Smi::FromInt(macro_assembler_->Implementation())); | |
| 996 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, | |
| 997 Smi::FromInt(next_register_)); | |
| 998 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, | |
| 999 Smi::FromInt(capture_count)); | |
| 1000 Handle<Object> code = macro_assembler_->GetCode(pattern); | 1082 Handle<Object> code = macro_assembler_->GetCode(pattern); |
| 1001 array->set(RegExpImpl::kIrregexpCodeIndex, *code); | 1083 |
| 1002 work_list_ = NULL; | 1084 work_list_ = NULL; |
| 1003 #ifdef DEBUG | 1085 #ifdef DEBUG |
| 1004 if (FLAG_trace_regexp_assembler) { | 1086 if (FLAG_trace_regexp_assembler) { |
| 1005 delete macro_assembler_; | 1087 delete macro_assembler_; |
| 1006 } | 1088 } |
| 1007 #endif | 1089 #endif |
| 1008 return array; | 1090 return RegExpEngine::CompilationResult(*code, next_register_); |
| 1009 } | 1091 } |
| 1010 | 1092 |
| 1011 | 1093 |
| 1012 bool Trace::DeferredAction::Mentions(int that) { | 1094 bool Trace::DeferredAction::Mentions(int that) { |
| 1013 if (type() == ActionNode::CLEAR_CAPTURES) { | 1095 if (type() == ActionNode::CLEAR_CAPTURES) { |
| 1014 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 1096 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
| 1015 return range.Contains(that); | 1097 return range.Contains(that); |
| 1016 } else { | 1098 } else { |
| 1017 return reg() == that; | 1099 return reg() == that; |
| 1018 } | 1100 } |
| (...skipping 2697 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3716 // x{f, t} becomes this: | 3798 // x{f, t} becomes this: |
| 3717 // | 3799 // |
| 3718 // (r++)<-. | 3800 // (r++)<-. |
| 3719 // | ` | 3801 // | ` |
| 3720 // | (x) | 3802 // | (x) |
| 3721 // v ^ | 3803 // v ^ |
| 3722 // (r=0)-->(?)---/ [if r < t] | 3804 // (r=0)-->(?)---/ [if r < t] |
| 3723 // | | 3805 // | |
| 3724 // [if r >= f] \----> ... | 3806 // [if r >= f] \----> ... |
| 3725 // | 3807 // |
| 3726 // | |
| 3727 // TODO(someone): clear captures on repetition and handle empty | |
| 3728 // matches. | |
| 3729 | 3808 |
| 3730 // 15.10.2.5 RepeatMatcher algorithm. | 3809 // 15.10.2.5 RepeatMatcher algorithm. |
| 3731 // The parser has already eliminated the case where max is 0. In the case | 3810 // The parser has already eliminated the case where max is 0. In the case |
| 3732 // where max_match is zero the parser has removed the quantifier if min was | 3811 // where max_match is zero the parser has removed the quantifier if min was |
| 3733 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. | 3812 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. |
| 3734 | 3813 |
| 3735 // If we know that we cannot match zero length then things are a little | 3814 // If we know that we cannot match zero length then things are a little |
| 3736 // simpler since we don't need to make the special zero length match check | 3815 // simpler since we don't need to make the special zero length match check |
| 3737 // from step 2.1. If the min and max are small we can unroll a little in | 3816 // from step 2.1. If the min and max are small we can unroll a little in |
| 3738 // this case. | 3817 // this case. |
| (...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4585 } | 4664 } |
| 4586 } | 4665 } |
| 4587 | 4666 |
| 4588 | 4667 |
| 4589 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 4668 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 4590 RegExpNode* target = that->on_success(); | 4669 RegExpNode* target = that->on_success(); |
| 4591 target->Accept(this); | 4670 target->Accept(this); |
| 4592 } | 4671 } |
| 4593 | 4672 |
| 4594 | 4673 |
| 4595 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, | 4674 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, |
| 4596 bool ignore_case, | 4675 bool ignore_case, |
| 4597 bool is_multiline, | 4676 bool is_multiline, |
| 4598 Handle<String> pattern, | 4677 Handle<String> pattern, |
| 4599 bool is_ascii) { | 4678 bool is_ascii) { |
| 4600 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 4679 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 4601 return IrregexpRegExpTooBig(pattern); | 4680 return IrregexpRegExpTooBig(); |
| 4602 } | 4681 } |
| 4603 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); | 4682 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
| 4604 // Wrap the body of the regexp in capture #0. | 4683 // Wrap the body of the regexp in capture #0. |
| 4605 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4684 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
| 4606 0, | 4685 0, |
| 4607 &compiler, | 4686 &compiler, |
| 4608 compiler.accept()); | 4687 compiler.accept()); |
| 4609 RegExpNode* node = captured_body; | 4688 RegExpNode* node = captured_body; |
| 4610 if (!data->tree->IsAnchored()) { | 4689 if (!data->tree->IsAnchored()) { |
| 4611 // Add a .*? at the beginning, outside the body capture, unless | 4690 // Add a .*? at the beginning, outside the body capture, unless |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4658 EmbeddedVector<byte, 1024> codes; | 4737 EmbeddedVector<byte, 1024> codes; |
| 4659 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4738 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4660 return compiler.Assemble(¯o_assembler, | 4739 return compiler.Assemble(¯o_assembler, |
| 4661 node, | 4740 node, |
| 4662 data->capture_count, | 4741 data->capture_count, |
| 4663 pattern); | 4742 pattern); |
| 4664 } | 4743 } |
| 4665 | 4744 |
| 4666 | 4745 |
| 4667 }} // namespace v8::internal | 4746 }} // namespace v8::internal |
| OLD | NEW |