Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Side by Side Diff: regexp2000/src/jsregexp.cc

Issue 8765: * Use new RegExp parser. (Closed)
Patch Set: Use new RegExp parser. Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « regexp2000/src/jsregexp.h ('k') | regexp2000/src/objects.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 20 matching lines...) Expand all
31 31
32 #include "ast.h" 32 #include "ast.h"
33 #include "execution.h" 33 #include "execution.h"
34 #include "factory.h" 34 #include "factory.h"
35 #include "jsregexp-inl.h" 35 #include "jsregexp-inl.h"
36 #include "platform.h" 36 #include "platform.h"
37 #include "runtime.h" 37 #include "runtime.h"
38 #include "top.h" 38 #include "top.h"
39 #include "compilation-cache.h" 39 #include "compilation-cache.h"
40 #include "string-stream.h" 40 #include "string-stream.h"
41 #include "parser.h"
41 42
42 // Including pcre.h undefines DEBUG to avoid getting debug output from 43 // Including pcre.h undefines DEBUG to avoid getting debug output from
43 // the JSCRE implementation. Make sure to redefine it in debug mode 44 // the JSCRE implementation. Make sure to redefine it in debug mode
44 // after having included the header file. 45 // after having included the header file.
45 #ifdef DEBUG 46 #ifdef DEBUG
46 #include "third_party/jscre/pcre.h" 47 #include "third_party/jscre/pcre.h"
47 #define DEBUG 48 #define DEBUG
48 #else 49 #else
49 #include "third_party/jscre/pcre.h" 50 #include "third_party/jscre/pcre.h"
50 #endif 51 #endif
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
169 break; 170 break;
170 case 'm': 171 case 'm':
171 flags |= JSRegExp::MULTILINE; 172 flags |= JSRegExp::MULTILINE;
172 break; 173 break;
173 } 174 }
174 } 175 }
175 return JSRegExp::Flags(flags); 176 return JSRegExp::Flags(flags);
176 } 177 }
177 178
178 179
179 unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char; 180 static inline Handle<Object> CreateRegExpException(Handle<JSRegExp> re,
181 Handle<String> pattern,
182 Handle<String> error_text,
183 const char* message) {
184 Handle<JSArray> array = Factory::NewJSArray(2);
185 SetElement(array, 0, pattern);
186 SetElement(array, 1, error_text);
187 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
188 return Handle<Object>(Top::Throw(*regexp_err));
189 }
180 190
181 191
182 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, 192 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
183 Handle<String> pattern, 193 Handle<String> pattern,
184 Handle<String> flag_str) { 194 Handle<String> flag_str) {
185 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 195 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
186 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 196 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
187 bool in_cache = !cached.is_null(); 197 bool in_cache = !cached.is_null();
188 Handle<Object> result; 198 Handle<Object> result;
189 if (in_cache) { 199 if (in_cache) {
190 re->set_data(*cached); 200 re->set_data(*cached);
191 result = re; 201 result = re;
192 } else { 202 } else {
193 bool is_atom = !flags.is_ignore_case(); 203 SafeStringInputBuffer buffer(pattern.location());
194 for (int i = 0; is_atom && i < pattern->length(); i++) { 204 Handle<String> error_text;
195 if (is_reg_exp_special_char.get(pattern->Get(i))) 205 RegExpTree* ast = ParseRegExp(&buffer, &error_text);
196 is_atom = false; 206 if (!error_text.is_null()) {
207 // Throw an exception if we fail to parse the pattern.
208 return CreateRegExpException(re, pattern, error_text, "malformed_regexp");
197 } 209 }
198 if (is_atom) { 210 RegExpAtom* atom = ast->AsAtom();
199 result = AtomCompile(re, pattern, flags); 211 if (atom != NULL && !flags.is_ignore_case()) {
212 Vector<const uc16> atom_pattern = atom->data();
213 // Test if pattern equals atom_pattern and reuse pattern if it does.
214 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
215 result = AtomCompile(re, atom_string, flags);
200 } else { 216 } else {
201 result = JsreCompile(re, pattern, flags); 217 result = JsrePrepare(re, pattern, flags);
202 } 218 }
203 Object* data = re->data(); 219 Object* data = re->data();
204 if (data->IsFixedArray()) { 220 if (data->IsFixedArray()) {
205 // If compilation succeeded then the data is set on the regexp 221 // If compilation succeeded then the data is set on the regexp
206 // and we can store it in the cache. 222 // and we can store it in the cache.
207 Handle<FixedArray> data(FixedArray::cast(re->data())); 223 Handle<FixedArray> data(FixedArray::cast(re->data()));
208 CompilationCache::PutRegExp(pattern, flags, data); 224 CompilationCache::PutRegExp(pattern, flags, data);
209 } 225 }
210 } 226 }
211 227
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
304 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); 320 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
305 SetElement(result, match_count, pair); 321 SetElement(result, match_count, pair);
306 match_count++; 322 match_count++;
307 index = end; 323 index = end;
308 if (needle_length == 0) index++; 324 if (needle_length == 0) index++;
309 } 325 }
310 return result; 326 return result;
311 } 327 }
312 328
313 329
314 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re, 330 Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re,
315 Handle<String> pattern, 331 Handle<String> pattern,
316 JSRegExp::Flags flags) { 332 JSRegExp::Flags flags) {
333 Handle<Object> value(Heap::undefined_value());
334 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
335 return re;
336 }
337
338
339 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) {
340 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
341 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
342
343 Handle<String> pattern(re->Pattern());
344 JSRegExp::Flags flags = re->GetFlags();
345
317 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() 346 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
318 ? JSRegExpIgnoreCase 347 ? JSRegExpIgnoreCase
319 : JSRegExpDoNotIgnoreCase; 348 : JSRegExpDoNotIgnoreCase;
320 JSRegExpMultilineOption multiline_option = flags.is_multiline() 349 JSRegExpMultilineOption multiline_option = flags.is_multiline()
321 ? JSRegExpMultiline 350 ? JSRegExpMultiline
322 : JSRegExpSingleLine; 351 : JSRegExpSingleLine;
323 352
324 Handle<String> two_byte_pattern = StringToTwoByte(pattern); 353 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
325 354
326 unsigned number_of_captures; 355 unsigned number_of_captures;
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
470 }; 499 };
471 500
472 501
473 int OffsetsVector::static_offsets_vector_[ 502 int OffsetsVector::static_offsets_vector_[
474 OffsetsVector::kStaticOffsetsVectorSize]; 503 OffsetsVector::kStaticOffsetsVectorSize];
475 504
476 505
477 Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp, 506 Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
478 Handle<String> subject, 507 Handle<String> subject,
479 Handle<Object> index) { 508 Handle<Object> index) {
509 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
510 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
511 Handle<Object> compile_result = JsreCompile(regexp);
512 if (compile_result->IsException()) return compile_result;
513 }
514 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
515
480 // Prepare space for the return values. 516 // Prepare space for the return values.
481 int num_captures = JsreCapture(regexp); 517 int num_captures = JsreCapture(regexp);
482 518
483 OffsetsVector offsets(num_captures); 519 OffsetsVector offsets(num_captures);
484 520
485 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); 521 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
486 522
487 Handle<String> subject16 = CachedStringToTwoByte(subject); 523 Handle<String> subject16 = CachedStringToTwoByte(subject);
488 524
489 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject, 525 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
490 previous_index, 526 previous_index,
491 subject16->GetTwoByteData(), 527 subject16->GetTwoByteData(),
492 offsets.vector(), offsets.length())); 528 offsets.vector(), offsets.length()));
493 529
494 return result; 530 return result;
495 } 531 }
496 532
497 533
498 Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp, 534 Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
499 Handle<String> subject) { 535 Handle<String> subject) {
536 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
537 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
538 Handle<Object> compile_result = JsreCompile(regexp);
539 if (compile_result->IsException()) return compile_result;
540 }
541 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
542
500 // Prepare space for the return values. 543 // Prepare space for the return values.
501 int num_captures = JsreCapture(regexp); 544 int num_captures = JsreCapture(regexp);
502 545
503 OffsetsVector offsets(num_captures); 546 OffsetsVector offsets(num_captures);
504 547
505 int previous_index = 0; 548 int previous_index = 0;
506 549
507 Handle<JSArray> result = Factory::NewJSArray(0); 550 Handle<JSArray> result = Factory::NewJSArray(0);
508 int i = 0; 551 int i = 0;
509 Handle<Object> matches; 552 Handle<Object> matches;
(...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after
891 CharacterClass whitespace_; 934 CharacterClass whitespace_;
892 CharacterClass word_; 935 CharacterClass word_;
893 }; 936 };
894 937
895 938
896 StaticCharacterClasses* StaticCharacterClasses::instance_ = NULL; 939 StaticCharacterClasses* StaticCharacterClasses::instance_ = NULL;
897 940
898 941
899 StaticCharacterClasses::StaticCharacterClasses() { 942 StaticCharacterClasses::StaticCharacterClasses() {
900 #define MAKE_CLASS(Name)\ 943 #define MAKE_CLASS(Name)\
901 CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges,\ 944 CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges, \
902 k##Name##RangeCount), \ 945 k##Name##RangeCount), \
903 &static_allocator_) 946 &static_allocator_)
904 947
905 const int kDigitRangeCount = 1; 948 const int kDigitRangeCount = 1;
906 CharacterClass::Range kDigitRanges[kDigitRangeCount] = { 949 CharacterClass::Range kDigitRanges[kDigitRangeCount] = {
907 CharacterClass::Range('0', '9') 950 CharacterClass::Range('0', '9')
908 }; 951 };
909 digit_ = MAKE_CLASS(Digit); 952 digit_ = MAKE_CLASS(Digit);
910 953
911 const int kWhiteSpaceRangeCount = 10; 954 const int kWhiteSpaceRangeCount = 10;
(...skipping 335 matching lines...) Expand 10 before | Expand all | Expand 10 after
1247 template 1290 template
1248 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start, 1291 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start,
1249 Vector<const char> input); 1292 Vector<const char> input);
1250 1293
1251 template 1294 template
1252 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start, 1295 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start,
1253 Vector<const uc16> input); 1296 Vector<const uc16> input);
1254 1297
1255 1298
1256 }} // namespace v8::internal 1299 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « regexp2000/src/jsregexp.h ('k') | regexp2000/src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698