Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(799)

Side by Side Diff: src/jsregexp.cc

Issue 8104: Regexp caching (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 16 matching lines...) Expand all
27 27
28 #include "v8.h" 28 #include "v8.h"
29 29
30 #include "execution.h" 30 #include "execution.h"
31 #include "factory.h" 31 #include "factory.h"
32 #include "jsregexp.h" 32 #include "jsregexp.h"
33 #include "third_party/jscre/pcre.h" 33 #include "third_party/jscre/pcre.h"
34 #include "platform.h" 34 #include "platform.h"
35 #include "runtime.h" 35 #include "runtime.h"
36 #include "top.h" 36 #include "top.h"
37 #include "compilation-cache.h"
37 38
38 namespace v8 { namespace internal { 39 namespace v8 { namespace internal {
39 40
40 41
41 #define CAPTURE_INDEX 0 42 #define CAPTURE_INDEX 0
42 #define INTERNAL_INDEX 1 43 #define INTERNAL_INDEX 1
43 44
44 static Failure* malloc_failure; 45 static Failure* malloc_failure;
45 46
46 static void* JSREMalloc(size_t size) { 47 static void* JSREMalloc(size_t size) {
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 Factory::NewRawTwoByteString(flat_string->length(), TENURED); 137 Factory::NewRawTwoByteString(flat_string->length(), TENURED);
137 static StringInputBuffer convert_to_two_byte_buffer; 138 static StringInputBuffer convert_to_two_byte_buffer;
138 convert_to_two_byte_buffer.Reset(*flat_string); 139 convert_to_two_byte_buffer.Reset(*flat_string);
139 for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) { 140 for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) {
140 two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext()); 141 two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext());
141 } 142 }
142 return two_byte_string; 143 return two_byte_string;
143 } 144 }
144 145
145 146
147 static int RegExpFlagsFromString(Handle<String> str) {
148 int flags = JSRegExp::NONE;
Kasper Lund 2008/10/24 06:42:50 Would it make sense to have some sort of opaque da
149 for (int i = 0; i < str->length(); i++) {
150 switch (str->Get(i)) {
151 case 'i':
152 flags |= JSRegExp::IGNORE_CASE;
153 break;
154 case 'g':
155 flags |= JSRegExp::GLOBAL;
156 break;
157 case 'm':
158 flags |= JSRegExp::MULTILINE;
159 break;
160 }
161 }
162 return flags;
163 }
164
165
146 unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char; 166 unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char;
147 167
148 168
149 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, 169 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
150 Handle<String> pattern, 170 Handle<String> pattern,
151 Handle<String> flags) { 171 Handle<String> flag_str) {
152 bool is_atom = true; 172 int flags = RegExpFlagsFromString(flag_str);
153 for (int i = 0; is_atom && i < flags->length(); i++) { 173 Handle<Object> cached = CompilationCache::LookupRegExp(pattern, flags);
154 if (flags->Get(i) == 'i') 174 bool in_cache = cached->IsFixedArray();
Kasper Lund 2008/10/24 06:42:50 I would move the IsFixedArray logic into the cache
155 is_atom = false;
156 }
157 for (int i = 0; is_atom && i < pattern->length(); i++) {
158 if (is_reg_exp_special_char.get(pattern->Get(i)))
159 is_atom = false;
160 }
161 Handle<Object> result; 175 Handle<Object> result;
162 if (is_atom) { 176 if (in_cache) {
163 result = AtomCompile(re, pattern); 177 re->set_data(*cached);
178 result = re;
164 } else { 179 } else {
165 result = JsreCompile(re, pattern, flags); 180 bool is_atom = ((flags & JSRegExp::IGNORE_CASE) == 0);
181 for (int i = 0; is_atom && i < pattern->length(); i++) {
182 if (is_reg_exp_special_char.get(pattern->Get(i)))
183 is_atom = false;
184 }
185 if (is_atom) {
186 result = AtomCompile(re, pattern, flags);
187 } else {
188 result = JsreCompile(re, pattern, flags);
189 }
190 Object* data = re->data();
191 if (data->IsFixedArray()) {
192 // If compilation succeeded then the data is set on the regexp
193 // and we can store it in the cache.
194 Handle<FixedArray> data(FixedArray::cast(re->data()));
195 CompilationCache::PutRegExp(pattern, flags, data);
196 }
166 } 197 }
167 198
168 LOG(RegExpCompileEvent(re)); 199 LOG(RegExpCompileEvent(re, in_cache));
169 return result; 200 return result;
170 } 201 }
171 202
172 203
173 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 204 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
174 Handle<String> subject, 205 Handle<String> subject,
175 Handle<Object> index) { 206 Handle<Object> index) {
176 switch (regexp->type_tag()) { 207 switch (regexp->TypeTag()) {
177 case JSRegExp::JSCRE: 208 case JSRegExp::JSCRE:
178 return JsreExec(regexp, subject, index); 209 return JsreExec(regexp, subject, index);
179 case JSRegExp::ATOM: 210 case JSRegExp::ATOM:
180 return AtomExec(regexp, subject, index); 211 return AtomExec(regexp, subject, index);
181 default: 212 default:
182 UNREACHABLE(); 213 UNREACHABLE();
183 return Handle<Object>(); 214 return Handle<Object>();
184 } 215 }
185 } 216 }
186 217
187 218
188 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 219 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
189 Handle<String> subject) { 220 Handle<String> subject) {
190 switch (regexp->type_tag()) { 221 switch (regexp->TypeTag()) {
191 case JSRegExp::JSCRE: 222 case JSRegExp::JSCRE:
192 return JsreExecGlobal(regexp, subject); 223 return JsreExecGlobal(regexp, subject);
193 case JSRegExp::ATOM: 224 case JSRegExp::ATOM:
194 return AtomExecGlobal(regexp, subject); 225 return AtomExecGlobal(regexp, subject);
195 default: 226 default:
196 UNREACHABLE(); 227 UNREACHABLE();
197 return Handle<Object>(); 228 return Handle<Object>();
198 } 229 }
199 } 230 }
200 231
201 232
202 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, 233 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
203 Handle<String> pattern) { 234 Handle<String> pattern,
204 re->set_type_tag(JSRegExp::ATOM); 235 int flags) {
205 re->set_data(*pattern); 236 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, pattern);
206 return re; 237 return re;
207 } 238 }
208 239
209 240
210 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 241 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
211 Handle<String> subject, 242 Handle<String> subject,
212 Handle<Object> index) { 243 Handle<Object> index) {
213 Handle<String> needle(String::cast(re->data())); 244 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
214 245
215 uint32_t start_index; 246 uint32_t start_index;
216 if (!Array::IndexFromObject(*index, &start_index)) { 247 if (!Array::IndexFromObject(*index, &start_index)) {
217 return Handle<Smi>(Smi::FromInt(-1)); 248 return Handle<Smi>(Smi::FromInt(-1));
218 } 249 }
219 250
220 LOG(RegExpExecEvent(re, start_index, subject)); 251 LOG(RegExpExecEvent(re, start_index, subject));
221 int value = Runtime::StringMatch(subject, needle, start_index); 252 int value = Runtime::StringMatch(subject, needle, start_index);
222 if (value == -1) return Factory::null_value(); 253 if (value == -1) return Factory::null_value();
223 254
224 Handle<FixedArray> array = Factory::NewFixedArray(2); 255 Handle<FixedArray> array = Factory::NewFixedArray(2);
225 array->set(0, 256 array->set(0,
226 Smi::FromInt(value), 257 Smi::FromInt(value),
227 SKIP_WRITE_BARRIER); 258 SKIP_WRITE_BARRIER);
228 array->set(1, 259 array->set(1,
229 Smi::FromInt(value + needle->length()), 260 Smi::FromInt(value + needle->length()),
230 SKIP_WRITE_BARRIER); 261 SKIP_WRITE_BARRIER);
231 return Factory::NewJSArrayWithElements(array); 262 return Factory::NewJSArrayWithElements(array);
232 } 263 }
233 264
234 265
235 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, 266 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
236 Handle<String> subject) { 267 Handle<String> subject) {
237 Handle<String> needle(String::cast(re->data())); 268 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
238 Handle<JSArray> result = Factory::NewJSArray(1); 269 Handle<JSArray> result = Factory::NewJSArray(1);
239 int index = 0; 270 int index = 0;
240 int match_count = 0; 271 int match_count = 0;
241 int subject_length = subject->length(); 272 int subject_length = subject->length();
242 int needle_length = needle->length(); 273 int needle_length = needle->length();
243 while (true) { 274 while (true) {
244 LOG(RegExpExecEvent(re, index, subject)); 275 LOG(RegExpExecEvent(re, index, subject));
245 int value = -1; 276 int value = -1;
246 if (index + needle_length <= subject_length) { 277 if (index + needle_length <= subject_length) {
247 value = Runtime::StringMatch(subject, needle, index); 278 value = Runtime::StringMatch(subject, needle, index);
(...skipping 14 matching lines...) Expand all
262 match_count++; 293 match_count++;
263 index = end; 294 index = end;
264 if (needle_length == 0) index++; 295 if (needle_length == 0) index++;
265 } 296 }
266 return result; 297 return result;
267 } 298 }
268 299
269 300
270 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re, 301 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
271 Handle<String> pattern, 302 Handle<String> pattern,
272 Handle<String> flags) { 303 int flags) {
273 JSRegExpIgnoreCaseOption case_option = JSRegExpDoNotIgnoreCase; 304 JSRegExpIgnoreCaseOption case_option = (flags & JSRegExp::IGNORE_CASE)
274 JSRegExpMultilineOption multiline_option = JSRegExpSingleLine; 305 ? JSRegExpIgnoreCase
275 FlattenString(flags); 306 : JSRegExpDoNotIgnoreCase;
276 for (int i = 0; i < flags->length(); i++) { 307 JSRegExpMultilineOption multiline_option = (flags & JSRegExp::MULTILINE)
277 if (flags->Get(i) == 'i') case_option = JSRegExpIgnoreCase; 308 ? JSRegExpMultiline
278 if (flags->Get(i) == 'm') multiline_option = JSRegExpMultiline; 309 : JSRegExpSingleLine;
279 }
280 310
281 Handle<String> two_byte_pattern = StringToTwoByte(pattern); 311 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
282 312
283 unsigned number_of_captures; 313 unsigned number_of_captures;
284 const char* error_message = NULL; 314 const char* error_message = NULL;
285 315
286 JscreRegExp* code = NULL; 316 JscreRegExp* code = NULL;
287 FlattenString(pattern); 317 FlattenString(pattern);
288 318
289 bool first_time = true; 319 bool first_time = true;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
321 } 351 }
322 352
323 ASSERT(code != NULL); 353 ASSERT(code != NULL);
324 // Convert the return address to a ByteArray pointer. 354 // Convert the return address to a ByteArray pointer.
325 Handle<ByteArray> internal( 355 Handle<ByteArray> internal(
326 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); 356 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
327 357
328 Handle<FixedArray> value = Factory::NewFixedArray(2); 358 Handle<FixedArray> value = Factory::NewFixedArray(2);
329 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures)); 359 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
330 value->set(INTERNAL_INDEX, *internal); 360 value->set(INTERNAL_INDEX, *internal);
331 re->set_type_tag(JSRegExp::JSCRE); 361 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
332 re->set_data(*value);
333 362
334 return re; 363 return re;
335 } 364 }
336 } 365 }
337 366
338 367
339 Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp, 368 Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
340 int num_captures, 369 int num_captures,
341 Handle<String> subject, 370 Handle<String> subject,
342 int previous_index, 371 int previous_index,
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
492 // If we exited the loop with an exception, throw it. 521 // If we exited the loop with an exception, throw it.
493 if (matches->IsNull()) { // Exited loop normally. 522 if (matches->IsNull()) { // Exited loop normally.
494 return result; 523 return result;
495 } else { // Exited loop with the exception in matches. 524 } else { // Exited loop with the exception in matches.
496 return matches; 525 return matches;
497 } 526 }
498 } 527 }
499 528
500 529
501 int RegExpImpl::JsreCapture(Handle<JSRegExp> re) { 530 int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {
502 Object* value = re->data(); 531 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
503 ASSERT(value->IsFixedArray()); 532 return Smi::cast(value->get(CAPTURE_INDEX))->value();
504 return Smi::cast(FixedArray::cast(value)->get(CAPTURE_INDEX))->value();
505 } 533 }
506 534
507 535
508 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { 536 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
509 Object* value = re->data(); 537 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
510 ASSERT(value->IsFixedArray()); 538 return ByteArray::cast(value->get(INTERNAL_INDEX));
511 return ByteArray::cast(FixedArray::cast(value)->get(INTERNAL_INDEX));
512 } 539 }
513 540
514 }} // namespace v8::internal 541 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698