Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/jsregexp.cc

Issue 7477045: Tentative implementation of string slices (hidden under the flag --string-slices). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Included Vitaly's suggestions. Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 Handle<String> subject, 229 Handle<String> subject,
230 int index, 230 int index,
231 Handle<JSArray> last_match_info) { 231 Handle<JSArray> last_match_info) {
232 Isolate* isolate = re->GetIsolate(); 232 Isolate* isolate = re->GetIsolate();
233 233
234 ASSERT(0 <= index); 234 ASSERT(0 <= index);
235 ASSERT(index <= subject->length()); 235 ASSERT(index <= subject->length());
236 236
237 if (!subject->IsFlat()) FlattenString(subject); 237 if (!subject->IsFlat()) FlattenString(subject);
238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
239 // Extract flattened substrings of cons strings before determining asciiness. 239 // Extract flattened substrings of cons strings before determining asciiness.
Vitaly Repeshko 2011/08/17 19:20:23 Update the comment.
240 String* seq_sub = *subject; 240 String* seq_sub = StringShape(*subject).IsIndirect()
241 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); 241 ? subject->GetIndirect()
242 : *subject;
242 243
243 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); 244 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
244 int needle_len = needle->length(); 245 int needle_len = needle->length();
245 246
246 if (needle_len != 0) { 247 if (needle_len != 0) {
247 if (index + needle_len > subject->length()) 248 if (index + needle_len > subject->length())
248 return isolate->factory()->null_value(); 249 return isolate->factory()->null_value();
249 250
250 // dispatch on type of strings 251 // dispatch on type of strings
251 index = (needle->IsAsciiRepresentation() 252 index = (needle->IsAsciiRepresentation()
252 ? (seq_sub->IsAsciiRepresentation() 253 ? (seq_sub->IsAsciiRepresentation()
253 ? SearchString(isolate, 254 ? SearchString(isolate,
254 seq_sub->ToAsciiVector(), 255 subject->ToAsciiVector(),
Vitaly Repeshko 2011/08/17 19:20:23 To{Ascii,UC16}Vector won't work in case an indirec
Yang 2011/08/18 12:17:32 I solved this by changing the assertion in both To
255 needle->ToAsciiVector(), 256 needle->ToAsciiVector(),
256 index) 257 index)
257 : SearchString(isolate, 258 : SearchString(isolate,
258 seq_sub->ToUC16Vector(), 259 subject->ToUC16Vector(),
259 needle->ToAsciiVector(), 260 needle->ToAsciiVector(),
260 index)) 261 index))
261 : (seq_sub->IsAsciiRepresentation() 262 : (seq_sub->IsAsciiRepresentation()
262 ? SearchString(isolate, 263 ? SearchString(isolate,
263 seq_sub->ToAsciiVector(), 264 subject->ToAsciiVector(),
264 needle->ToUC16Vector(), 265 needle->ToUC16Vector(),
265 index) 266 index)
266 : SearchString(isolate, 267 : SearchString(isolate,
267 seq_sub->ToUC16Vector(), 268 subject->ToUC16Vector(),
268 needle->ToUC16Vector(), 269 needle->ToUC16Vector(),
269 index))); 270 index)));
270 if (index == -1) return FACTORY->null_value(); 271 if (index == -1) return FACTORY->null_value();
271 } 272 }
272 ASSERT(last_match_info->HasFastElements()); 273 ASSERT(last_match_info->HasFastElements());
273 274
274 { 275 {
275 NoHandleAllocation no_handles; 276 NoHandleAllocation no_handles;
276 FixedArray* array = FixedArray::cast(last_match_info->elements()); 277 FixedArray* array = FixedArray::cast(last_match_info->elements());
277 SetAtomLastCapture(array, *subject, index, index + needle_len); 278 SetAtomLastCapture(array, *subject, index, index + needle_len);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); 349 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
349 ASSERT(error_string->IsString()); 350 ASSERT(error_string->IsString());
350 Handle<String> error_message(String::cast(error_string)); 351 Handle<String> error_message(String::cast(error_string));
351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 352 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
352 return false; 353 return false;
353 } 354 }
354 355
355 JSRegExp::Flags flags = re->GetFlags(); 356 JSRegExp::Flags flags = re->GetFlags();
356 357
357 Handle<String> pattern(re->Pattern()); 358 Handle<String> pattern(re->Pattern());
358 if (!pattern->IsFlat()) { 359 if (!pattern->IsFlat()) FlattenString(pattern);
359 FlattenString(pattern);
360 }
361
362 RegExpCompileData compile_data; 360 RegExpCompileData compile_data;
363 FlatStringReader reader(isolate, pattern); 361 FlatStringReader reader(isolate, pattern);
364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 362 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
365 &compile_data)) { 363 &compile_data)) {
366 // Throw an exception if we fail to parse the pattern. 364 // Throw an exception if we fail to parse the pattern.
367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 365 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
368 ThrowRegExpException(re, 366 ThrowRegExpException(re,
369 pattern, 367 pattern,
370 compile_data.error, 368 compile_data.error,
371 "malformed_regexp"); 369 "malformed_regexp");
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
435 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, 433 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
436 JSRegExp::IRREGEXP, 434 JSRegExp::IRREGEXP,
437 pattern, 435 pattern,
438 flags, 436 flags,
439 capture_count); 437 capture_count);
440 } 438 }
441 439
442 440
443 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 441 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
444 Handle<String> subject) { 442 Handle<String> subject) {
445 if (!subject->IsFlat()) { 443 if (!subject->IsFlat()) FlattenString(subject);
446 FlattenString(subject); 444
447 }
448 // Check the asciiness of the underlying storage. 445 // Check the asciiness of the underlying storage.
449 bool is_ascii; 446 bool is_ascii;
450 { 447 {
451 AssertNoAllocation no_gc; 448 AssertNoAllocation no_gc;
452 String* sequential_string = *subject; 449 String* sequential_string = StringShape(*subject).IsIndirect()
453 if (subject->IsConsString()) { 450 ? subject->GetIndirect()
454 sequential_string = ConsString::cast(*subject)->first(); 451 : *subject;
455 }
456 is_ascii = sequential_string->IsAsciiRepresentation(); 452 is_ascii = sequential_string->IsAsciiRepresentation();
457 } 453 }
458 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { 454 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
459 return -1; 455 return -1;
460 } 456 }
461 #ifdef V8_INTERPRETED_REGEXP 457 #ifdef V8_INTERPRETED_REGEXP
462 // Byte-code regexp needs space allocated for all its registers. 458 // Byte-code regexp needs space allocated for all its registers.
463 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); 459 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
464 #else // V8_INTERPRETED_REGEXP 460 #else // V8_INTERPRETED_REGEXP
465 // Native regexp only needs room to output captures. Registers are handled 461 // Native regexp only needs room to output captures. Registers are handled
466 // internally. 462 // internally.
467 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 463 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
468 #endif // V8_INTERPRETED_REGEXP 464 #endif // V8_INTERPRETED_REGEXP
469 } 465 }
470 466
471 467
472 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( 468 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
473 Handle<JSRegExp> regexp, 469 Handle<JSRegExp> regexp,
474 Handle<String> subject, 470 Handle<String> subject,
475 int index, 471 int index,
476 Vector<int> output) { 472 Vector<int> output) {
477 Isolate* isolate = regexp->GetIsolate(); 473 Isolate* isolate = regexp->GetIsolate();
478 474
479 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 475 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
480 476
481 ASSERT(index >= 0); 477 ASSERT(index >= 0);
482 ASSERT(index <= subject->length()); 478 ASSERT(index <= subject->length());
483 ASSERT(subject->IsFlat()); 479 ASSERT(subject->IsFlat());
484 480
485 // A flat ASCII string might have a two-byte first part.
486 if (subject->IsConsString()) {
487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
Vitaly Repeshko 2011/08/17 19:20:23 This unwrapping didn't help the code called below
488 }
489
490 #ifndef V8_INTERPRETED_REGEXP 481 #ifndef V8_INTERPRETED_REGEXP
491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 482 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
492 do { 483 do {
493 bool is_ascii = subject->IsAsciiRepresentation(); 484 // A flat ASCII indirect string might actually be two-byte.
485 bool is_ascii;
486 if (StringShape(*subject).IsIndirect()) {
487 is_ascii = subject->GetIndirect()->IsAsciiRepresentation();
488 } else {
489 is_ascii = subject->IsAsciiRepresentation();
490 }
494 EnsureCompiledIrregexp(regexp, is_ascii); 491 EnsureCompiledIrregexp(regexp, is_ascii);
495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 492 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
496 NativeRegExpMacroAssembler::Result res = 493 NativeRegExpMacroAssembler::Result res =
497 NativeRegExpMacroAssembler::Match(code, 494 NativeRegExpMacroAssembler::Match(code,
498 subject, 495 subject,
499 output.start(), 496 output.start(),
500 output.length(), 497 output.length(),
501 index, 498 index,
502 isolate); 499 isolate);
503 if (res != NativeRegExpMacroAssembler::RETRY) { 500 if (res != NativeRegExpMacroAssembler::RETRY) {
(...skipping 4853 matching lines...) Expand 10 before | Expand all | Expand 10 after
5357 } 5354 }
5358 5355
5359 return compiler.Assemble(&macro_assembler, 5356 return compiler.Assemble(&macro_assembler,
5360 node, 5357 node,
5361 data->capture_count, 5358 data->capture_count,
5362 pattern); 5359 pattern);
5363 } 5360 }
5364 5361
5365 5362
5366 }} // namespace v8::internal 5363 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698