Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Side by Side Diff: src/jsregexp.cc

Issue 7477045: Tentative implementation of string slices (hidden under the flag --string-slices). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Implemented suggested changes. Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 Handle<String> subject, 229 Handle<String> subject,
230 int index, 230 int index,
231 Handle<JSArray> last_match_info) { 231 Handle<JSArray> last_match_info) {
232 Isolate* isolate = re->GetIsolate(); 232 Isolate* isolate = re->GetIsolate();
233 233
234 ASSERT(0 <= index); 234 ASSERT(0 <= index);
235 ASSERT(index <= subject->length()); 235 ASSERT(index <= subject->length());
236 236
237 if (!subject->IsFlat()) FlattenString(subject); 237 if (!subject->IsFlat()) FlattenString(subject);
238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
239 // Extract flattened substrings of cons strings before determining asciiness.
240 String* seq_sub = *subject;
241 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
242 239
243 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); 240 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
Vitaly Repeshko 2011/08/19 16:27:40 We don't call needle->IsAsciiRepresentationUnderne
244 int needle_len = needle->length(); 241 int needle_len = needle->length();
245 242
246 if (needle_len != 0) { 243 if (needle_len != 0) {
247 if (index + needle_len > subject->length()) 244 if (index + needle_len > subject->length())
248 return isolate->factory()->null_value(); 245 return isolate->factory()->null_value();
249 246
250 // dispatch on type of strings 247 // dispatch on type of strings
251 index = (needle->IsAsciiRepresentation() 248 index = (needle->IsAsciiRepresentation()
252 ? (seq_sub->IsAsciiRepresentation() 249 ? (subject->IsAsciiRepresentationUnderneath()
253 ? SearchString(isolate, 250 ? SearchString(isolate,
254 seq_sub->ToAsciiVector(), 251 subject->ToAsciiVector(),
255 needle->ToAsciiVector(), 252 needle->ToAsciiVector(),
256 index) 253 index)
257 : SearchString(isolate, 254 : SearchString(isolate,
258 seq_sub->ToUC16Vector(), 255 subject->ToUC16Vector(),
259 needle->ToAsciiVector(), 256 needle->ToAsciiVector(),
260 index)) 257 index))
261 : (seq_sub->IsAsciiRepresentation() 258 : (subject->IsAsciiRepresentationUnderneath()
262 ? SearchString(isolate, 259 ? SearchString(isolate,
263 seq_sub->ToAsciiVector(), 260 subject->ToAsciiVector(),
264 needle->ToUC16Vector(), 261 needle->ToUC16Vector(),
265 index) 262 index)
266 : SearchString(isolate, 263 : SearchString(isolate,
267 seq_sub->ToUC16Vector(), 264 subject->ToUC16Vector(),
268 needle->ToUC16Vector(), 265 needle->ToUC16Vector(),
269 index))); 266 index)));
270 if (index == -1) return FACTORY->null_value(); 267 if (index == -1) return FACTORY->null_value();
271 } 268 }
272 ASSERT(last_match_info->HasFastElements()); 269 ASSERT(last_match_info->HasFastElements());
273 270
274 { 271 {
275 NoHandleAllocation no_handles; 272 NoHandleAllocation no_handles;
276 FixedArray* array = FixedArray::cast(last_match_info->elements()); 273 FixedArray* array = FixedArray::cast(last_match_info->elements());
277 SetAtomLastCapture(array, *subject, index, index + needle_len); 274 SetAtomLastCapture(array, *subject, index, index + needle_len);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); 345 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
349 ASSERT(error_string->IsString()); 346 ASSERT(error_string->IsString());
350 Handle<String> error_message(String::cast(error_string)); 347 Handle<String> error_message(String::cast(error_string));
351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 348 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
352 return false; 349 return false;
353 } 350 }
354 351
355 JSRegExp::Flags flags = re->GetFlags(); 352 JSRegExp::Flags flags = re->GetFlags();
356 353
357 Handle<String> pattern(re->Pattern()); 354 Handle<String> pattern(re->Pattern());
358 if (!pattern->IsFlat()) { 355 if (!pattern->IsFlat()) FlattenString(pattern);
359 FlattenString(pattern);
360 }
361
362 RegExpCompileData compile_data; 356 RegExpCompileData compile_data;
363 FlatStringReader reader(isolate, pattern); 357 FlatStringReader reader(isolate, pattern);
364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 358 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
365 &compile_data)) { 359 &compile_data)) {
366 // Throw an exception if we fail to parse the pattern. 360 // Throw an exception if we fail to parse the pattern.
367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 361 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
368 ThrowRegExpException(re, 362 ThrowRegExpException(re,
369 pattern, 363 pattern,
370 compile_data.error, 364 compile_data.error,
371 "malformed_regexp"); 365 "malformed_regexp");
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
435 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, 429 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
436 JSRegExp::IRREGEXP, 430 JSRegExp::IRREGEXP,
437 pattern, 431 pattern,
438 flags, 432 flags,
439 capture_count); 433 capture_count);
440 } 434 }
441 435
442 436
443 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 437 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
444 Handle<String> subject) { 438 Handle<String> subject) {
445 if (!subject->IsFlat()) { 439 if (!subject->IsFlat()) FlattenString(subject);
446 FlattenString(subject); 440
447 }
448 // Check the asciiness of the underlying storage. 441 // Check the asciiness of the underlying storage.
449 bool is_ascii; 442 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
450 { 443 if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1;
451 AssertNoAllocation no_gc; 444
452 String* sequential_string = *subject;
453 if (subject->IsConsString()) {
454 sequential_string = ConsString::cast(*subject)->first();
455 }
456 is_ascii = sequential_string->IsAsciiRepresentation();
457 }
458 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
459 return -1;
460 }
461 #ifdef V8_INTERPRETED_REGEXP 445 #ifdef V8_INTERPRETED_REGEXP
462 // Byte-code regexp needs space allocated for all its registers. 446 // Byte-code regexp needs space allocated for all its registers.
463 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); 447 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
464 #else // V8_INTERPRETED_REGEXP 448 #else // V8_INTERPRETED_REGEXP
465 // Native regexp only needs room to output captures. Registers are handled 449 // Native regexp only needs room to output captures. Registers are handled
466 // internally. 450 // internally.
467 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 451 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
468 #endif // V8_INTERPRETED_REGEXP 452 #endif // V8_INTERPRETED_REGEXP
469 } 453 }
470 454
471 455
472 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( 456 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
473 Handle<JSRegExp> regexp, 457 Handle<JSRegExp> regexp,
474 Handle<String> subject, 458 Handle<String> subject,
475 int index, 459 int index,
476 Vector<int> output) { 460 Vector<int> output) {
477 Isolate* isolate = regexp->GetIsolate(); 461 Isolate* isolate = regexp->GetIsolate();
478 462
479 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 463 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
480 464
481 ASSERT(index >= 0); 465 ASSERT(index >= 0);
482 ASSERT(index <= subject->length()); 466 ASSERT(index <= subject->length());
483 ASSERT(subject->IsFlat()); 467 ASSERT(subject->IsFlat());
484 468
485 // A flat ASCII string might have a two-byte first part. 469 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
486 if (subject->IsConsString()) {
487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
488 }
489 470
490 #ifndef V8_INTERPRETED_REGEXP 471 #ifndef V8_INTERPRETED_REGEXP
491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 472 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
492 do { 473 do {
493 bool is_ascii = subject->IsAsciiRepresentation();
494 EnsureCompiledIrregexp(regexp, is_ascii); 474 EnsureCompiledIrregexp(regexp, is_ascii);
495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 475 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
496 NativeRegExpMacroAssembler::Result res = 476 NativeRegExpMacroAssembler::Result res =
497 NativeRegExpMacroAssembler::Match(code, 477 NativeRegExpMacroAssembler::Match(code,
498 subject, 478 subject,
499 output.start(), 479 output.start(),
500 output.length(), 480 output.length(),
501 index, 481 index,
502 isolate); 482 isolate);
503 if (res != NativeRegExpMacroAssembler::RETRY) { 483 if (res != NativeRegExpMacroAssembler::RETRY) {
504 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || 484 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
505 isolate->has_pending_exception()); 485 isolate->has_pending_exception());
506 STATIC_ASSERT( 486 STATIC_ASSERT(
507 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 487 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
508 STATIC_ASSERT( 488 STATIC_ASSERT(
509 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 489 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
510 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 490 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
511 == RE_EXCEPTION); 491 == RE_EXCEPTION);
512 return static_cast<IrregexpResult>(res); 492 return static_cast<IrregexpResult>(res);
513 } 493 }
514 // If result is RETRY, the string has changed representation, and we 494 // If result is RETRY, the string has changed representation, and we
515 // must restart from scratch. 495 // must restart from scratch.
516 // In this case, it means we must make sure we are prepared to handle 496 // In this case, it means we must make sure we are prepared to handle
517 // the, potentially, different subject (the string can switch between 497 // the, potentially, different subject (the string can switch between
518 // being internal and external, and even between being ASCII and UC16, 498 // being internal and external, and even between being ASCII and UC16,
519 // but the characters are always the same). 499 // but the characters are always the same).
520 IrregexpPrepare(regexp, subject); 500 IrregexpPrepare(regexp, subject);
501 is_ascii = subject->IsAsciiRepresentationUnderneath();
521 } while (true); 502 } while (true);
522 UNREACHABLE(); 503 UNREACHABLE();
523 return RE_EXCEPTION; 504 return RE_EXCEPTION;
524 #else // V8_INTERPRETED_REGEXP 505 #else // V8_INTERPRETED_REGEXP
525 506
526 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); 507 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
527 bool is_ascii = subject->IsAsciiRepresentation();
528 // We must have done EnsureCompiledIrregexp, so we can get the number of 508 // We must have done EnsureCompiledIrregexp, so we can get the number of
529 // registers. 509 // registers.
530 int* register_vector = output.start(); 510 int* register_vector = output.start();
531 int number_of_capture_registers = 511 int number_of_capture_registers =
532 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 512 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
533 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 513 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
534 register_vector[i] = -1; 514 register_vector[i] = -1;
535 } 515 }
536 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); 516 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
537 517
(...skipping 4819 matching lines...) Expand 10 before | Expand all | Expand 10 after
5357 } 5337 }
5358 5338
5359 return compiler.Assemble(&macro_assembler, 5339 return compiler.Assemble(&macro_assembler,
5360 node, 5340 node,
5361 data->capture_count, 5341 data->capture_count,
5362 pattern); 5342 pattern);
5363 } 5343 }
5364 5344
5365 5345
5366 }} // namespace v8::internal 5346 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/interpreter-irregexp.cc ('k') | src/mark-compact.cc » ('j') | src/objects.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698