Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(218)

Side by Side Diff: src/jsregexp.cc

Issue 7477045: Tentative implementation of string slices (hidden under the flag --string-slices). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Some more suggested changes. Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 Handle<String> subject, 229 Handle<String> subject,
230 int index, 230 int index,
231 Handle<JSArray> last_match_info) { 231 Handle<JSArray> last_match_info) {
232 Isolate* isolate = re->GetIsolate(); 232 Isolate* isolate = re->GetIsolate();
233 233
234 ASSERT(0 <= index); 234 ASSERT(0 <= index);
235 ASSERT(index <= subject->length()); 235 ASSERT(index <= subject->length());
236 236
237 if (!subject->IsFlat()) FlattenString(subject); 237 if (!subject->IsFlat()) FlattenString(subject);
238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
239 // Extract flattened substrings of cons strings before determining asciiness.
240 String* seq_sub = *subject;
241 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
242 239
243 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); 240 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
241 ASSERT(StringShape(needle).IsSequential());
244 int needle_len = needle->length(); 242 int needle_len = needle->length();
245 243
246 if (needle_len != 0) { 244 if (needle_len != 0) {
247 if (index + needle_len > subject->length()) 245 if (index + needle_len > subject->length())
248 return isolate->factory()->null_value(); 246 return isolate->factory()->null_value();
249 247
250 // dispatch on type of strings 248 // dispatch on type of strings
251 index = (needle->IsAsciiRepresentation() 249 index = (needle->IsAsciiRepresentation()
252 ? (seq_sub->IsAsciiRepresentation() 250 ? (subject->IsAsciiRepresentationUnderneath()
253 ? SearchString(isolate, 251 ? SearchString(isolate,
254 seq_sub->ToAsciiVector(), 252 subject->ToAsciiVector(),
255 needle->ToAsciiVector(), 253 needle->ToAsciiVector(),
256 index) 254 index)
257 : SearchString(isolate, 255 : SearchString(isolate,
258 seq_sub->ToUC16Vector(), 256 subject->ToUC16Vector(),
259 needle->ToAsciiVector(), 257 needle->ToAsciiVector(),
260 index)) 258 index))
261 : (seq_sub->IsAsciiRepresentation() 259 : (subject->IsAsciiRepresentationUnderneath()
262 ? SearchString(isolate, 260 ? SearchString(isolate,
263 seq_sub->ToAsciiVector(), 261 subject->ToAsciiVector(),
264 needle->ToUC16Vector(), 262 needle->ToUC16Vector(),
265 index) 263 index)
266 : SearchString(isolate, 264 : SearchString(isolate,
267 seq_sub->ToUC16Vector(), 265 subject->ToUC16Vector(),
268 needle->ToUC16Vector(), 266 needle->ToUC16Vector(),
269 index))); 267 index)));
270 if (index == -1) return FACTORY->null_value(); 268 if (index == -1) return FACTORY->null_value();
271 } 269 }
272 ASSERT(last_match_info->HasFastElements()); 270 ASSERT(last_match_info->HasFastElements());
273 271
274 { 272 {
275 NoHandleAllocation no_handles; 273 NoHandleAllocation no_handles;
276 FixedArray* array = FixedArray::cast(last_match_info->elements()); 274 FixedArray* array = FixedArray::cast(last_match_info->elements());
277 SetAtomLastCapture(array, *subject, index, index + needle_len); 275 SetAtomLastCapture(array, *subject, index, index + needle_len);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); 346 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
349 ASSERT(error_string->IsString()); 347 ASSERT(error_string->IsString());
350 Handle<String> error_message(String::cast(error_string)); 348 Handle<String> error_message(String::cast(error_string));
351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 349 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
352 return false; 350 return false;
353 } 351 }
354 352
355 JSRegExp::Flags flags = re->GetFlags(); 353 JSRegExp::Flags flags = re->GetFlags();
356 354
357 Handle<String> pattern(re->Pattern()); 355 Handle<String> pattern(re->Pattern());
358 if (!pattern->IsFlat()) { 356 if (!pattern->IsFlat()) FlattenString(pattern);
359 FlattenString(pattern);
360 }
361
362 RegExpCompileData compile_data; 357 RegExpCompileData compile_data;
363 FlatStringReader reader(isolate, pattern); 358 FlatStringReader reader(isolate, pattern);
364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 359 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
365 &compile_data)) { 360 &compile_data)) {
366 // Throw an exception if we fail to parse the pattern. 361 // Throw an exception if we fail to parse the pattern.
367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 362 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
368 ThrowRegExpException(re, 363 ThrowRegExpException(re,
369 pattern, 364 pattern,
370 compile_data.error, 365 compile_data.error,
371 "malformed_regexp"); 366 "malformed_regexp");
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
435 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, 430 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
436 JSRegExp::IRREGEXP, 431 JSRegExp::IRREGEXP,
437 pattern, 432 pattern,
438 flags, 433 flags,
439 capture_count); 434 capture_count);
440 } 435 }
441 436
442 437
443 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 438 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
444 Handle<String> subject) { 439 Handle<String> subject) {
445 if (!subject->IsFlat()) { 440 if (!subject->IsFlat()) FlattenString(subject);
446 FlattenString(subject); 441
447 }
448 // Check the asciiness of the underlying storage. 442 // Check the asciiness of the underlying storage.
449 bool is_ascii; 443 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
450 { 444 if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1;
451 AssertNoAllocation no_gc; 445
452 String* sequential_string = *subject;
453 if (subject->IsConsString()) {
454 sequential_string = ConsString::cast(*subject)->first();
455 }
456 is_ascii = sequential_string->IsAsciiRepresentation();
457 }
458 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
459 return -1;
460 }
461 #ifdef V8_INTERPRETED_REGEXP 446 #ifdef V8_INTERPRETED_REGEXP
462 // Byte-code regexp needs space allocated for all its registers. 447 // Byte-code regexp needs space allocated for all its registers.
463 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); 448 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
464 #else // V8_INTERPRETED_REGEXP 449 #else // V8_INTERPRETED_REGEXP
465 // Native regexp only needs room to output captures. Registers are handled 450 // Native regexp only needs room to output captures. Registers are handled
466 // internally. 451 // internally.
467 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 452 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
468 #endif // V8_INTERPRETED_REGEXP 453 #endif // V8_INTERPRETED_REGEXP
469 } 454 }
470 455
471 456
472 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( 457 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
473 Handle<JSRegExp> regexp, 458 Handle<JSRegExp> regexp,
474 Handle<String> subject, 459 Handle<String> subject,
475 int index, 460 int index,
476 Vector<int> output) { 461 Vector<int> output) {
477 Isolate* isolate = regexp->GetIsolate(); 462 Isolate* isolate = regexp->GetIsolate();
478 463
479 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 464 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
480 465
481 ASSERT(index >= 0); 466 ASSERT(index >= 0);
482 ASSERT(index <= subject->length()); 467 ASSERT(index <= subject->length());
483 ASSERT(subject->IsFlat()); 468 ASSERT(subject->IsFlat());
484 469
485 // A flat ASCII string might have a two-byte first part. 470 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
486 if (subject->IsConsString()) {
487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
488 }
489 471
490 #ifndef V8_INTERPRETED_REGEXP 472 #ifndef V8_INTERPRETED_REGEXP
491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 473 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
492 do { 474 do {
493 bool is_ascii = subject->IsAsciiRepresentation();
494 EnsureCompiledIrregexp(regexp, is_ascii); 475 EnsureCompiledIrregexp(regexp, is_ascii);
495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 476 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
496 NativeRegExpMacroAssembler::Result res = 477 NativeRegExpMacroAssembler::Result res =
497 NativeRegExpMacroAssembler::Match(code, 478 NativeRegExpMacroAssembler::Match(code,
498 subject, 479 subject,
499 output.start(), 480 output.start(),
500 output.length(), 481 output.length(),
501 index, 482 index,
502 isolate); 483 isolate);
503 if (res != NativeRegExpMacroAssembler::RETRY) { 484 if (res != NativeRegExpMacroAssembler::RETRY) {
504 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || 485 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
505 isolate->has_pending_exception()); 486 isolate->has_pending_exception());
506 STATIC_ASSERT( 487 STATIC_ASSERT(
507 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 488 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
508 STATIC_ASSERT( 489 STATIC_ASSERT(
509 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 490 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
510 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 491 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
511 == RE_EXCEPTION); 492 == RE_EXCEPTION);
512 return static_cast<IrregexpResult>(res); 493 return static_cast<IrregexpResult>(res);
513 } 494 }
514 // If result is RETRY, the string has changed representation, and we 495 // If result is RETRY, the string has changed representation, and we
515 // must restart from scratch. 496 // must restart from scratch.
516 // In this case, it means we must make sure we are prepared to handle 497 // In this case, it means we must make sure we are prepared to handle
517 // the, potentially, different subject (the string can switch between 498 // the, potentially, different subject (the string can switch between
518 // being internal and external, and even between being ASCII and UC16, 499 // being internal and external, and even between being ASCII and UC16,
519 // but the characters are always the same). 500 // but the characters are always the same).
520 IrregexpPrepare(regexp, subject); 501 IrregexpPrepare(regexp, subject);
502 is_ascii = subject->IsAsciiRepresentationUnderneath();
521 } while (true); 503 } while (true);
522 UNREACHABLE(); 504 UNREACHABLE();
523 return RE_EXCEPTION; 505 return RE_EXCEPTION;
524 #else // V8_INTERPRETED_REGEXP 506 #else // V8_INTERPRETED_REGEXP
525 507
526 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); 508 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
527 bool is_ascii = subject->IsAsciiRepresentation();
528 // We must have done EnsureCompiledIrregexp, so we can get the number of 509 // We must have done EnsureCompiledIrregexp, so we can get the number of
529 // registers. 510 // registers.
530 int* register_vector = output.start(); 511 int* register_vector = output.start();
531 int number_of_capture_registers = 512 int number_of_capture_registers =
532 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 513 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
533 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 514 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
534 register_vector[i] = -1; 515 register_vector[i] = -1;
535 } 516 }
536 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); 517 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
537 518
(...skipping 4819 matching lines...) Expand 10 before | Expand all | Expand 10 after
5357 } 5338 }
5358 5339
5359 return compiler.Assemble(&macro_assembler, 5340 return compiler.Assemble(&macro_assembler,
5360 node, 5341 node,
5361 data->capture_count, 5342 data->capture_count,
5362 pattern); 5343 pattern);
5363 } 5344 }
5364 5345
5365 5346
5366 }} // namespace v8::internal 5347 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698