Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: src/jsregexp.cc

Issue 6685088: Merge isolates to bleeding_edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/jump-target-heavy.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 17 matching lines...) Expand all
28 #include "v8.h" 28 #include "v8.h"
29 29
30 #include "ast.h" 30 #include "ast.h"
31 #include "compiler.h" 31 #include "compiler.h"
32 #include "execution.h" 32 #include "execution.h"
33 #include "factory.h" 33 #include "factory.h"
34 #include "jsregexp.h" 34 #include "jsregexp.h"
35 #include "platform.h" 35 #include "platform.h"
36 #include "string-search.h" 36 #include "string-search.h"
37 #include "runtime.h" 37 #include "runtime.h"
38 #include "top.h"
39 #include "compilation-cache.h" 38 #include "compilation-cache.h"
40 #include "string-stream.h" 39 #include "string-stream.h"
41 #include "parser.h" 40 #include "parser.h"
42 #include "regexp-macro-assembler.h" 41 #include "regexp-macro-assembler.h"
43 #include "regexp-macro-assembler-tracer.h" 42 #include "regexp-macro-assembler-tracer.h"
44 #include "regexp-macro-assembler-irregexp.h" 43 #include "regexp-macro-assembler-irregexp.h"
45 #include "regexp-stack.h" 44 #include "regexp-stack.h"
46 45
47 #ifndef V8_INTERPRETED_REGEXP 46 #ifndef V8_INTERPRETED_REGEXP
48 #if V8_TARGET_ARCH_IA32 47 #if V8_TARGET_ARCH_IA32
49 #include "ia32/regexp-macro-assembler-ia32.h" 48 #include "ia32/regexp-macro-assembler-ia32.h"
50 #elif V8_TARGET_ARCH_X64 49 #elif V8_TARGET_ARCH_X64
51 #include "x64/regexp-macro-assembler-x64.h" 50 #include "x64/regexp-macro-assembler-x64.h"
52 #elif V8_TARGET_ARCH_ARM 51 #elif V8_TARGET_ARCH_ARM
53 #include "arm/regexp-macro-assembler-arm.h" 52 #include "arm/regexp-macro-assembler-arm.h"
54 #else 53 #else
55 #error Unsupported target architecture. 54 #error Unsupported target architecture.
56 #endif 55 #endif
57 #endif 56 #endif
58 57
59 #include "interpreter-irregexp.h" 58 #include "interpreter-irregexp.h"
60 59
61 60
62 namespace v8 { 61 namespace v8 {
63 namespace internal { 62 namespace internal {
64 63
65
66 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, 64 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
67 Handle<String> pattern, 65 Handle<String> pattern,
68 Handle<String> flags, 66 Handle<String> flags,
69 bool* has_pending_exception) { 67 bool* has_pending_exception) {
70 // Call the construct code with 2 arguments. 68 // Call the construct code with 2 arguments.
71 Object** argv[2] = { Handle<Object>::cast(pattern).location(), 69 Object** argv[2] = { Handle<Object>::cast(pattern).location(),
72 Handle<Object>::cast(flags).location() }; 70 Handle<Object>::cast(flags).location() };
73 return Execution::New(constructor, 2, argv, has_pending_exception); 71 return Execution::New(constructor, 2, argv, has_pending_exception);
74 } 72 }
75 73
(...skipping 14 matching lines...) Expand all
90 } 88 }
91 } 89 }
92 return JSRegExp::Flags(flags); 90 return JSRegExp::Flags(flags);
93 } 91 }
94 92
95 93
96 static inline void ThrowRegExpException(Handle<JSRegExp> re, 94 static inline void ThrowRegExpException(Handle<JSRegExp> re,
97 Handle<String> pattern, 95 Handle<String> pattern,
98 Handle<String> error_text, 96 Handle<String> error_text,
99 const char* message) { 97 const char* message) {
100 Handle<FixedArray> elements = Factory::NewFixedArray(2); 98 Isolate* isolate = re->GetIsolate();
99 Factory* factory = isolate->factory();
100 Handle<FixedArray> elements = factory->NewFixedArray(2);
101 elements->set(0, *pattern); 101 elements->set(0, *pattern);
102 elements->set(1, *error_text); 102 elements->set(1, *error_text);
103 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements); 103 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
104 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array); 104 Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
105 Top::Throw(*regexp_err); 105 isolate->Throw(*regexp_err);
106 } 106 }
107 107
108 108
109 // Generic RegExp methods. Dispatches to implementation specific methods. 109 // Generic RegExp methods. Dispatches to implementation specific methods.
110 110
111 111
112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, 112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
113 Handle<String> pattern, 113 Handle<String> pattern,
114 Handle<String> flag_str) { 114 Handle<String> flag_str) {
115 Isolate* isolate = re->GetIsolate();
115 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 116 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
116 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 117 CompilationCache* compilation_cache = isolate->compilation_cache();
118 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
117 bool in_cache = !cached.is_null(); 119 bool in_cache = !cached.is_null();
118 LOG(RegExpCompileEvent(re, in_cache)); 120 LOG(isolate, RegExpCompileEvent(re, in_cache));
119 121
120 Handle<Object> result; 122 Handle<Object> result;
121 if (in_cache) { 123 if (in_cache) {
122 re->set_data(*cached); 124 re->set_data(*cached);
123 return re; 125 return re;
124 } 126 }
125 pattern = FlattenGetString(pattern); 127 pattern = FlattenGetString(pattern);
126 CompilationZoneScope zone_scope(DELETE_ON_EXIT); 128 CompilationZoneScope zone_scope(DELETE_ON_EXIT);
127 PostponeInterruptsScope postpone; 129 PostponeInterruptsScope postpone(isolate);
128 RegExpCompileData parse_result; 130 RegExpCompileData parse_result;
129 FlatStringReader reader(pattern); 131 FlatStringReader reader(isolate, pattern);
130 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 132 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
131 &parse_result)) { 133 &parse_result)) {
132 // Throw an exception if we fail to parse the pattern. 134 // Throw an exception if we fail to parse the pattern.
133 ThrowRegExpException(re, 135 ThrowRegExpException(re,
134 pattern, 136 pattern,
135 parse_result.error, 137 parse_result.error,
136 "malformed_regexp"); 138 "malformed_regexp");
137 return Handle<Object>::null(); 139 return Handle<Object>::null();
138 } 140 }
139 141
140 if (parse_result.simple && !flags.is_ignore_case()) { 142 if (parse_result.simple && !flags.is_ignore_case()) {
141 // Parse-tree is a single atom that is equal to the pattern. 143 // Parse-tree is a single atom that is equal to the pattern.
142 AtomCompile(re, pattern, flags, pattern); 144 AtomCompile(re, pattern, flags, pattern);
143 } else if (parse_result.tree->IsAtom() && 145 } else if (parse_result.tree->IsAtom() &&
144 !flags.is_ignore_case() && 146 !flags.is_ignore_case() &&
145 parse_result.capture_count == 0) { 147 parse_result.capture_count == 0) {
146 RegExpAtom* atom = parse_result.tree->AsAtom(); 148 RegExpAtom* atom = parse_result.tree->AsAtom();
147 Vector<const uc16> atom_pattern = atom->data(); 149 Vector<const uc16> atom_pattern = atom->data();
148 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); 150 Handle<String> atom_string =
151 isolate->factory()->NewStringFromTwoByte(atom_pattern);
149 AtomCompile(re, pattern, flags, atom_string); 152 AtomCompile(re, pattern, flags, atom_string);
150 } else { 153 } else {
151 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); 154 IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
152 } 155 }
153 ASSERT(re->data()->IsFixedArray()); 156 ASSERT(re->data()->IsFixedArray());
154 // Compilation succeeded so the data is set on the regexp 157 // Compilation succeeded so the data is set on the regexp
155 // and we can store it in the cache. 158 // and we can store it in the cache.
156 Handle<FixedArray> data(FixedArray::cast(re->data())); 159 Handle<FixedArray> data(FixedArray::cast(re->data()));
157 CompilationCache::PutRegExp(pattern, flags, data); 160 compilation_cache->PutRegExp(pattern, flags, data);
158 161
159 return re; 162 return re;
160 } 163 }
161 164
162 165
163 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 166 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
164 Handle<String> subject, 167 Handle<String> subject,
165 int index, 168 int index,
166 Handle<JSArray> last_match_info) { 169 Handle<JSArray> last_match_info) {
167 switch (regexp->TypeTag()) { 170 switch (regexp->TypeTag()) {
168 case JSRegExp::ATOM: 171 case JSRegExp::ATOM:
169 return AtomExec(regexp, subject, index, last_match_info); 172 return AtomExec(regexp, subject, index, last_match_info);
170 case JSRegExp::IRREGEXP: { 173 case JSRegExp::IRREGEXP: {
171 Handle<Object> result = 174 Handle<Object> result =
172 IrregexpExec(regexp, subject, index, last_match_info); 175 IrregexpExec(regexp, subject, index, last_match_info);
173 ASSERT(!result.is_null() || Top::has_pending_exception()); 176 ASSERT(!result.is_null() || Isolate::Current()->has_pending_exception());
174 return result; 177 return result;
175 } 178 }
176 default: 179 default:
177 UNREACHABLE(); 180 UNREACHABLE();
178 return Handle<Object>::null(); 181 return Handle<Object>::null();
179 } 182 }
180 } 183 }
181 184
182 185
183 // RegExp Atom implementation: Simple string search using indexOf. 186 // RegExp Atom implementation: Simple string search using indexOf.
184 187
185 188
186 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, 189 void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
187 Handle<String> pattern, 190 Handle<String> pattern,
188 JSRegExp::Flags flags, 191 JSRegExp::Flags flags,
189 Handle<String> match_pattern) { 192 Handle<String> match_pattern) {
190 Factory::SetRegExpAtomData(re, 193 re->GetIsolate()->factory()->SetRegExpAtomData(re,
191 JSRegExp::ATOM, 194 JSRegExp::ATOM,
192 pattern, 195 pattern,
193 flags, 196 flags,
194 match_pattern); 197 match_pattern);
195 } 198 }
196 199
197 200
198 static void SetAtomLastCapture(FixedArray* array, 201 static void SetAtomLastCapture(FixedArray* array,
199 String* subject, 202 String* subject,
200 int from, 203 int from,
201 int to) { 204 int to) {
202 NoHandleAllocation no_handles; 205 NoHandleAllocation no_handles;
203 RegExpImpl::SetLastCaptureCount(array, 2); 206 RegExpImpl::SetLastCaptureCount(array, 2);
204 RegExpImpl::SetLastSubject(array, subject); 207 RegExpImpl::SetLastSubject(array, subject);
(...skipping 12 matching lines...) Expand all
217 220
218 int subject_length = sub_vector.length(); 221 int subject_length = sub_vector.length();
219 if (start_index + pattern_length > subject_length) return -1; 222 if (start_index + pattern_length > subject_length) return -1;
220 return SearchString(sub_vector, pat_vector, start_index); 223 return SearchString(sub_vector, pat_vector, start_index);
221 } 224 }
222 */ 225 */
223 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 226 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
224 Handle<String> subject, 227 Handle<String> subject,
225 int index, 228 int index,
226 Handle<JSArray> last_match_info) { 229 Handle<JSArray> last_match_info) {
230 Isolate* isolate = re->GetIsolate();
231
227 ASSERT(0 <= index); 232 ASSERT(0 <= index);
228 ASSERT(index <= subject->length()); 233 ASSERT(index <= subject->length());
229 234
230 if (!subject->IsFlat()) FlattenString(subject); 235 if (!subject->IsFlat()) FlattenString(subject);
231 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 236 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
232 // Extract flattened substrings of cons strings before determining asciiness. 237 // Extract flattened substrings of cons strings before determining asciiness.
233 String* seq_sub = *subject; 238 String* seq_sub = *subject;
234 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); 239 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
235 240
236 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); 241 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
237 int needle_len = needle->length(); 242 int needle_len = needle->length();
238 243
239 if (needle_len != 0) { 244 if (needle_len != 0) {
240 if (index + needle_len > subject->length()) return Factory::null_value(); 245 if (index + needle_len > subject->length())
246 return isolate->factory()->null_value();
247
241 // dispatch on type of strings 248 // dispatch on type of strings
242 index = (needle->IsAsciiRepresentation() 249 index = (needle->IsAsciiRepresentation()
243 ? (seq_sub->IsAsciiRepresentation() 250 ? (seq_sub->IsAsciiRepresentation()
244 ? SearchString(seq_sub->ToAsciiVector(), 251 ? SearchString(isolate,
252 seq_sub->ToAsciiVector(),
245 needle->ToAsciiVector(), 253 needle->ToAsciiVector(),
246 index) 254 index)
247 : SearchString(seq_sub->ToUC16Vector(), 255 : SearchString(isolate,
256 seq_sub->ToUC16Vector(),
248 needle->ToAsciiVector(), 257 needle->ToAsciiVector(),
249 index)) 258 index))
250 : (seq_sub->IsAsciiRepresentation() 259 : (seq_sub->IsAsciiRepresentation()
251 ? SearchString(seq_sub->ToAsciiVector(), 260 ? SearchString(isolate,
261 seq_sub->ToAsciiVector(),
252 needle->ToUC16Vector(), 262 needle->ToUC16Vector(),
253 index) 263 index)
254 : SearchString(seq_sub->ToUC16Vector(), 264 : SearchString(isolate,
265 seq_sub->ToUC16Vector(),
255 needle->ToUC16Vector(), 266 needle->ToUC16Vector(),
256 index))); 267 index)));
257 if (index == -1) return Factory::null_value(); 268 if (index == -1) return FACTORY->null_value();
258 } 269 }
259 ASSERT(last_match_info->HasFastElements()); 270 ASSERT(last_match_info->HasFastElements());
260 271
261 { 272 {
262 NoHandleAllocation no_handles; 273 NoHandleAllocation no_handles;
263 FixedArray* array = FixedArray::cast(last_match_info->elements()); 274 FixedArray* array = FixedArray::cast(last_match_info->elements());
264 SetAtomLastCapture(array, *subject, index, index + needle_len); 275 SetAtomLastCapture(array, *subject, index, index + needle_len);
265 } 276 }
266 return last_match_info; 277 return last_match_info;
267 } 278 }
(...skipping 13 matching lines...) Expand all
281 if (compiled_code->IsByteArray()) return true; 292 if (compiled_code->IsByteArray()) return true;
282 #else // V8_INTERPRETED_REGEXP (RegExp native code) 293 #else // V8_INTERPRETED_REGEXP (RegExp native code)
283 if (compiled_code->IsCode()) return true; 294 if (compiled_code->IsCode()) return true;
284 #endif 295 #endif
285 return CompileIrregexp(re, is_ascii); 296 return CompileIrregexp(re, is_ascii);
286 } 297 }
287 298
288 299
289 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { 300 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
290 // Compile the RegExp. 301 // Compile the RegExp.
302 Isolate* isolate = re->GetIsolate();
291 CompilationZoneScope zone_scope(DELETE_ON_EXIT); 303 CompilationZoneScope zone_scope(DELETE_ON_EXIT);
292 PostponeInterruptsScope postpone; 304 PostponeInterruptsScope postpone(isolate);
293 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); 305 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
294 if (entry->IsJSObject()) { 306 if (entry->IsJSObject()) {
295 // If it's a JSObject, a previous compilation failed and threw this object. 307 // If it's a JSObject, a previous compilation failed and threw this object.
296 // Re-throw the object without trying again. 308 // Re-throw the object without trying again.
297 Top::Throw(entry); 309 isolate->Throw(entry);
298 return false; 310 return false;
299 } 311 }
300 ASSERT(entry->IsTheHole()); 312 ASSERT(entry->IsTheHole());
301 313
302 JSRegExp::Flags flags = re->GetFlags(); 314 JSRegExp::Flags flags = re->GetFlags();
303 315
304 Handle<String> pattern(re->Pattern()); 316 Handle<String> pattern(re->Pattern());
305 if (!pattern->IsFlat()) { 317 if (!pattern->IsFlat()) {
306 FlattenString(pattern); 318 FlattenString(pattern);
307 } 319 }
308 320
309 RegExpCompileData compile_data; 321 RegExpCompileData compile_data;
310 FlatStringReader reader(pattern); 322 FlatStringReader reader(isolate, pattern);
311 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 323 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
312 &compile_data)) { 324 &compile_data)) {
313 // Throw an exception if we fail to parse the pattern. 325 // Throw an exception if we fail to parse the pattern.
314 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 326 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
315 ThrowRegExpException(re, 327 ThrowRegExpException(re,
316 pattern, 328 pattern,
317 compile_data.error, 329 compile_data.error,
318 "malformed_regexp"); 330 "malformed_regexp");
319 return false; 331 return false;
320 } 332 }
321 RegExpEngine::CompilationResult result = 333 RegExpEngine::CompilationResult result =
322 RegExpEngine::Compile(&compile_data, 334 RegExpEngine::Compile(&compile_data,
323 flags.is_ignore_case(), 335 flags.is_ignore_case(),
324 flags.is_multiline(), 336 flags.is_multiline(),
325 pattern, 337 pattern,
326 is_ascii); 338 is_ascii);
327 if (result.error_message != NULL) { 339 if (result.error_message != NULL) {
328 // Unable to compile regexp. 340 // Unable to compile regexp.
329 Handle<FixedArray> elements = Factory::NewFixedArray(2); 341 Factory* factory = isolate->factory();
342 Handle<FixedArray> elements = factory->NewFixedArray(2);
330 elements->set(0, *pattern); 343 elements->set(0, *pattern);
331 Handle<String> error_message = 344 Handle<String> error_message =
332 Factory::NewStringFromUtf8(CStrVector(result.error_message)); 345 factory->NewStringFromUtf8(CStrVector(result.error_message));
333 elements->set(1, *error_message); 346 elements->set(1, *error_message);
334 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements); 347 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
335 Handle<Object> regexp_err = 348 Handle<Object> regexp_err =
336 Factory::NewSyntaxError("malformed_regexp", array); 349 factory->NewSyntaxError("malformed_regexp", array);
337 Top::Throw(*regexp_err); 350 isolate->Throw(*regexp_err);
338 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err); 351 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);
339 return false; 352 return false;
340 } 353 }
341 354
342 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); 355 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
343 data->set(JSRegExp::code_index(is_ascii), result.code); 356 data->set(JSRegExp::code_index(is_ascii), result.code);
344 int register_max = IrregexpMaxRegisterCount(*data); 357 int register_max = IrregexpMaxRegisterCount(*data);
345 if (result.num_registers > register_max) { 358 if (result.num_registers > register_max) {
346 SetIrregexpMaxRegisterCount(*data, result.num_registers); 359 SetIrregexpMaxRegisterCount(*data, result.num_registers);
347 } 360 }
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { 392 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
380 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); 393 return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
381 } 394 }
382 395
383 396
384 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, 397 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
385 Handle<String> pattern, 398 Handle<String> pattern,
386 JSRegExp::Flags flags, 399 JSRegExp::Flags flags,
387 int capture_count) { 400 int capture_count) {
388 // Initialize compiled code entries to null. 401 // Initialize compiled code entries to null.
389 Factory::SetRegExpIrregexpData(re, 402 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
390 JSRegExp::IRREGEXP, 403 JSRegExp::IRREGEXP,
391 pattern, 404 pattern,
392 flags, 405 flags,
393 capture_count); 406 capture_count);
394 } 407 }
395 408
396 409
397 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 410 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
398 Handle<String> subject) { 411 Handle<String> subject) {
399 if (!subject->IsFlat()) { 412 if (!subject->IsFlat()) {
400 FlattenString(subject); 413 FlattenString(subject);
401 } 414 }
402 // Check the asciiness of the underlying storage. 415 // Check the asciiness of the underlying storage.
403 bool is_ascii; 416 bool is_ascii;
(...skipping 17 matching lines...) Expand all
421 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 434 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
422 #endif // V8_INTERPRETED_REGEXP 435 #endif // V8_INTERPRETED_REGEXP
423 } 436 }
424 437
425 438
426 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( 439 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
427 Handle<JSRegExp> regexp, 440 Handle<JSRegExp> regexp,
428 Handle<String> subject, 441 Handle<String> subject,
429 int index, 442 int index,
430 Vector<int> output) { 443 Vector<int> output) {
431 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); 444 Isolate* isolate = regexp->GetIsolate();
445
446 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
432 447
433 ASSERT(index >= 0); 448 ASSERT(index >= 0);
434 ASSERT(index <= subject->length()); 449 ASSERT(index <= subject->length());
435 ASSERT(subject->IsFlat()); 450 ASSERT(subject->IsFlat());
436 451
437 // A flat ASCII string might have a two-byte first part. 452 // A flat ASCII string might have a two-byte first part.
438 if (subject->IsConsString()) { 453 if (subject->IsConsString()) {
439 subject = Handle<String>(ConsString::cast(*subject)->first()); 454 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
440 } 455 }
441 456
442 #ifndef V8_INTERPRETED_REGEXP 457 #ifndef V8_INTERPRETED_REGEXP
443 ASSERT(output.length() >= 458 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
444 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
445 do { 459 do {
446 bool is_ascii = subject->IsAsciiRepresentation(); 460 bool is_ascii = subject->IsAsciiRepresentation();
447 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); 461 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
448 NativeRegExpMacroAssembler::Result res = 462 NativeRegExpMacroAssembler::Result res =
449 NativeRegExpMacroAssembler::Match(code, 463 NativeRegExpMacroAssembler::Match(code,
450 subject, 464 subject,
451 output.start(), 465 output.start(),
452 output.length(), 466 output.length(),
453 index); 467 index,
468 isolate);
454 if (res != NativeRegExpMacroAssembler::RETRY) { 469 if (res != NativeRegExpMacroAssembler::RETRY) {
455 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || 470 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
456 Top::has_pending_exception()); 471 isolate->has_pending_exception());
457 STATIC_ASSERT( 472 STATIC_ASSERT(
458 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 473 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
459 STATIC_ASSERT( 474 STATIC_ASSERT(
460 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 475 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
461 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 476 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
462 == RE_EXCEPTION); 477 == RE_EXCEPTION);
463 return static_cast<IrregexpResult>(res); 478 return static_cast<IrregexpResult>(res);
464 } 479 }
465 // If result is RETRY, the string has changed representation, and we 480 // If result is RETRY, the string has changed representation, and we
466 // must restart from scratch. 481 // must restart from scratch.
(...skipping 10 matching lines...) Expand all
477 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); 492 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
478 bool is_ascii = subject->IsAsciiRepresentation(); 493 bool is_ascii = subject->IsAsciiRepresentation();
479 // We must have done EnsureCompiledIrregexp, so we can get the number of 494 // We must have done EnsureCompiledIrregexp, so we can get the number of
480 // registers. 495 // registers.
481 int* register_vector = output.start(); 496 int* register_vector = output.start();
482 int number_of_capture_registers = 497 int number_of_capture_registers =
483 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 498 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
484 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 499 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
485 register_vector[i] = -1; 500 register_vector[i] = -1;
486 } 501 }
487 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); 502 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
488 503
489 if (IrregexpInterpreter::Match(byte_codes, 504 if (IrregexpInterpreter::Match(byte_codes,
490 subject, 505 subject,
491 register_vector, 506 register_vector,
492 index)) { 507 index)) {
493 return RE_SUCCESS; 508 return RE_SUCCESS;
494 } 509 }
495 return RE_FAILURE; 510 return RE_FAILURE;
496 #endif // V8_INTERPRETED_REGEXP 511 #endif // V8_INTERPRETED_REGEXP
497 } 512 }
(...skipping 11 matching lines...) Expand all
509 if (FLAG_trace_regexp_bytecodes) { 524 if (FLAG_trace_regexp_bytecodes) {
510 String* pattern = jsregexp->Pattern(); 525 String* pattern = jsregexp->Pattern();
511 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 526 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
512 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 527 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
513 } 528 }
514 #endif 529 #endif
515 #endif 530 #endif
516 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); 531 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
517 if (required_registers < 0) { 532 if (required_registers < 0) {
518 // Compiling failed with an exception. 533 // Compiling failed with an exception.
519 ASSERT(Top::has_pending_exception()); 534 ASSERT(Isolate::Current()->has_pending_exception());
520 return Handle<Object>::null(); 535 return Handle<Object>::null();
521 } 536 }
522 537
523 OffsetsVector registers(required_registers); 538 OffsetsVector registers(required_registers);
524 539
525 IrregexpResult res = RegExpImpl::IrregexpExecOnce( 540 IrregexpResult res = RegExpImpl::IrregexpExecOnce(
526 jsregexp, subject, previous_index, Vector<int>(registers.vector(), 541 jsregexp, subject, previous_index, Vector<int>(registers.vector(),
527 registers.length())); 542 registers.length()));
528 if (res == RE_SUCCESS) { 543 if (res == RE_SUCCESS) {
529 int capture_register_count = 544 int capture_register_count =
530 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; 545 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
531 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); 546 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
532 AssertNoAllocation no_gc; 547 AssertNoAllocation no_gc;
533 int* register_vector = registers.vector(); 548 int* register_vector = registers.vector();
534 FixedArray* array = FixedArray::cast(last_match_info->elements()); 549 FixedArray* array = FixedArray::cast(last_match_info->elements());
535 for (int i = 0; i < capture_register_count; i += 2) { 550 for (int i = 0; i < capture_register_count; i += 2) {
536 SetCapture(array, i, register_vector[i]); 551 SetCapture(array, i, register_vector[i]);
537 SetCapture(array, i + 1, register_vector[i + 1]); 552 SetCapture(array, i + 1, register_vector[i + 1]);
538 } 553 }
539 SetLastCaptureCount(array, capture_register_count); 554 SetLastCaptureCount(array, capture_register_count);
540 SetLastSubject(array, *subject); 555 SetLastSubject(array, *subject);
541 SetLastInput(array, *subject); 556 SetLastInput(array, *subject);
542 return last_match_info; 557 return last_match_info;
543 } 558 }
544 if (res == RE_EXCEPTION) { 559 if (res == RE_EXCEPTION) {
545 ASSERT(Top::has_pending_exception()); 560 ASSERT(Isolate::Current()->has_pending_exception());
546 return Handle<Object>::null(); 561 return Handle<Object>::null();
547 } 562 }
548 ASSERT(res == RE_FAILURE); 563 ASSERT(res == RE_FAILURE);
549 return Factory::null_value(); 564 return Isolate::Current()->factory()->null_value();
550 } 565 }
551 566
552 567
553 // ------------------------------------------------------------------- 568 // -------------------------------------------------------------------
554 // Implementation of the Irregexp regular expression engine. 569 // Implementation of the Irregexp regular expression engine.
555 // 570 //
556 // The Irregexp regular expression engine is intended to be a complete 571 // The Irregexp regular expression engine is intended to be a complete
557 // implementation of ECMAScript regular expressions. It generates either 572 // implementation of ECMAScript regular expressions. It generates either
558 // bytecodes or native code. 573 // bytecodes or native code.
559 574
(...skipping 739 matching lines...) Expand 10 before | Expand all | Expand 10 after
1299 case Guard::GEQ: 1314 case Guard::GEQ:
1300 ASSERT(!trace->mentions_reg(guard->reg())); 1315 ASSERT(!trace->mentions_reg(guard->reg()));
1301 macro_assembler->IfRegisterLT(guard->reg(), 1316 macro_assembler->IfRegisterLT(guard->reg(),
1302 guard->value(), 1317 guard->value(),
1303 trace->backtrack()); 1318 trace->backtrack());
1304 break; 1319 break;
1305 } 1320 }
1306 } 1321 }
1307 1322
1308 1323
1309 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1310 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1311
1312
1313 // Returns the number of characters in the equivalence class, omitting those 1324 // Returns the number of characters in the equivalence class, omitting those
1314 // that cannot occur in the source string because it is ASCII. 1325 // that cannot occur in the source string because it is ASCII.
1315 static int GetCaseIndependentLetters(uc16 character, 1326 static int GetCaseIndependentLetters(Isolate* isolate,
1327 uc16 character,
1316 bool ascii_subject, 1328 bool ascii_subject,
1317 unibrow::uchar* letters) { 1329 unibrow::uchar* letters) {
1318 int length = uncanonicalize.get(character, '\0', letters); 1330 int length =
1331 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
1319 // Unibrow returns 0 or 1 for characters where case independence is 1332 // Unibrow returns 0 or 1 for characters where case independence is
1320 // trivial. 1333 // trivial.
1321 if (length == 0) { 1334 if (length == 0) {
1322 letters[0] = character; 1335 letters[0] = character;
1323 length = 1; 1336 length = 1;
1324 } 1337 }
1325 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { 1338 if (!ascii_subject || character <= String::kMaxAsciiCharCode) {
1326 return length; 1339 return length;
1327 } 1340 }
1328 // The standard requires that non-ASCII characters cannot have ASCII 1341 // The standard requires that non-ASCII characters cannot have ASCII
1329 // character codes in their equivalence class. 1342 // character codes in their equivalence class.
1330 return 0; 1343 return 0;
1331 } 1344 }
1332 1345
1333 1346
1334 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, 1347 static inline bool EmitSimpleCharacter(Isolate* isolate,
1348 RegExpCompiler* compiler,
1335 uc16 c, 1349 uc16 c,
1336 Label* on_failure, 1350 Label* on_failure,
1337 int cp_offset, 1351 int cp_offset,
1338 bool check, 1352 bool check,
1339 bool preloaded) { 1353 bool preloaded) {
1340 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1354 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1341 bool bound_checked = false; 1355 bool bound_checked = false;
1342 if (!preloaded) { 1356 if (!preloaded) {
1343 assembler->LoadCurrentCharacter( 1357 assembler->LoadCurrentCharacter(
1344 cp_offset, 1358 cp_offset,
1345 on_failure, 1359 on_failure,
1346 check); 1360 check);
1347 bound_checked = true; 1361 bound_checked = true;
1348 } 1362 }
1349 assembler->CheckNotCharacter(c, on_failure); 1363 assembler->CheckNotCharacter(c, on_failure);
1350 return bound_checked; 1364 return bound_checked;
1351 } 1365 }
1352 1366
1353 1367
1354 // Only emits non-letters (things that don't have case). Only used for case 1368 // Only emits non-letters (things that don't have case). Only used for case
1355 // independent matches. 1369 // independent matches.
1356 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, 1370 static inline bool EmitAtomNonLetter(Isolate* isolate,
1371 RegExpCompiler* compiler,
1357 uc16 c, 1372 uc16 c,
1358 Label* on_failure, 1373 Label* on_failure,
1359 int cp_offset, 1374 int cp_offset,
1360 bool check, 1375 bool check,
1361 bool preloaded) { 1376 bool preloaded) {
1362 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1377 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1363 bool ascii = compiler->ascii(); 1378 bool ascii = compiler->ascii();
1364 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1379 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1365 int length = GetCaseIndependentLetters(c, ascii, chars); 1380 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1366 if (length < 1) { 1381 if (length < 1) {
1367 // This can't match. Must be an ASCII subject and a non-ASCII character. 1382 // This can't match. Must be an ASCII subject and a non-ASCII character.
1368 // We do not need to do anything since the ASCII pass already handled this. 1383 // We do not need to do anything since the ASCII pass already handled this.
1369 return false; // Bounds not checked. 1384 return false; // Bounds not checked.
1370 } 1385 }
1371 bool checked = false; 1386 bool checked = false;
1372 // We handle the length > 1 case in a later pass. 1387 // We handle the length > 1 case in a later pass.
1373 if (length == 1) { 1388 if (length == 1) {
1374 if (ascii && c > String::kMaxAsciiCharCodeU) { 1389 if (ascii && c > String::kMaxAsciiCharCodeU) {
1375 // Can't match - see above. 1390 // Can't match - see above.
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1417 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 1432 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1418 diff, 1433 diff,
1419 mask, 1434 mask,
1420 on_failure); 1435 on_failure);
1421 return true; 1436 return true;
1422 } 1437 }
1423 return false; 1438 return false;
1424 } 1439 }
1425 1440
1426 1441
1427 typedef bool EmitCharacterFunction(RegExpCompiler* compiler, 1442 typedef bool EmitCharacterFunction(Isolate* isolate,
1443 RegExpCompiler* compiler,
1428 uc16 c, 1444 uc16 c,
1429 Label* on_failure, 1445 Label* on_failure,
1430 int cp_offset, 1446 int cp_offset,
1431 bool check, 1447 bool check,
1432 bool preloaded); 1448 bool preloaded);
1433 1449
1434 // Only emits letters (things that have case). Only used for case independent 1450 // Only emits letters (things that have case). Only used for case independent
1435 // matches. 1451 // matches.
1436 static inline bool EmitAtomLetter(RegExpCompiler* compiler, 1452 static inline bool EmitAtomLetter(Isolate* isolate,
1453 RegExpCompiler* compiler,
1437 uc16 c, 1454 uc16 c,
1438 Label* on_failure, 1455 Label* on_failure,
1439 int cp_offset, 1456 int cp_offset,
1440 bool check, 1457 bool check,
1441 bool preloaded) { 1458 bool preloaded) {
1442 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1459 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1443 bool ascii = compiler->ascii(); 1460 bool ascii = compiler->ascii();
1444 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1461 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1445 int length = GetCaseIndependentLetters(c, ascii, chars); 1462 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1446 if (length <= 1) return false; 1463 if (length <= 1) return false;
1447 // We may not need to check against the end of the input string 1464 // We may not need to check against the end of the input string
1448 // if this character lies before a character that matched. 1465 // if this character lies before a character that matched.
1449 if (!preloaded) { 1466 if (!preloaded) {
1450 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1467 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1451 } 1468 }
1452 Label ok; 1469 Label ok;
1453 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1470 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1454 switch (length) { 1471 switch (length) {
1455 case 2: { 1472 case 2: {
(...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after
1873 // 1890 //
1874 // We iterate along the text object, building up for each character a 1891 // We iterate along the text object, building up for each character a
1875 // mask and value that can be used to test for a quick failure to match. 1892 // mask and value that can be used to test for a quick failure to match.
1876 // The masks and values for the positions will be combined into a single 1893 // The masks and values for the positions will be combined into a single
1877 // machine word for the current character width in order to be used in 1894 // machine word for the current character width in order to be used in
1878 // generating a quick check. 1895 // generating a quick check.
1879 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 1896 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1880 RegExpCompiler* compiler, 1897 RegExpCompiler* compiler,
1881 int characters_filled_in, 1898 int characters_filled_in,
1882 bool not_at_start) { 1899 bool not_at_start) {
1900 Isolate* isolate = Isolate::Current();
1883 ASSERT(characters_filled_in < details->characters()); 1901 ASSERT(characters_filled_in < details->characters());
1884 int characters = details->characters(); 1902 int characters = details->characters();
1885 int char_mask; 1903 int char_mask;
1886 int char_shift; 1904 int char_shift;
1887 if (compiler->ascii()) { 1905 if (compiler->ascii()) {
1888 char_mask = String::kMaxAsciiCharCode; 1906 char_mask = String::kMaxAsciiCharCode;
1889 char_shift = 8; 1907 char_shift = 8;
1890 } else { 1908 } else {
1891 char_mask = String::kMaxUC16CharCode; 1909 char_mask = String::kMaxUC16CharCode;
1892 char_shift = 16; 1910 char_shift = 16;
(...skipping 10 matching lines...) Expand all
1903 // If we expect a non-ASCII character from an ASCII string, 1921 // If we expect a non-ASCII character from an ASCII string,
1904 // there is no way we can match. Not even case independent 1922 // there is no way we can match. Not even case independent
1905 // matching can turn an ASCII character into non-ASCII or 1923 // matching can turn an ASCII character into non-ASCII or
1906 // vice versa. 1924 // vice versa.
1907 details->set_cannot_match(); 1925 details->set_cannot_match();
1908 pos->determines_perfectly = false; 1926 pos->determines_perfectly = false;
1909 return; 1927 return;
1910 } 1928 }
1911 if (compiler->ignore_case()) { 1929 if (compiler->ignore_case()) {
1912 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1930 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1913 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars); 1931 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
1932 chars);
1914 ASSERT(length != 0); // Can only happen if c > char_mask (see above). 1933 ASSERT(length != 0); // Can only happen if c > char_mask (see above).
1915 if (length == 1) { 1934 if (length == 1) {
1916 // This letter has no case equivalents, so it's nice and simple 1935 // This letter has no case equivalents, so it's nice and simple
1917 // and the mask-compare will determine definitely whether we have 1936 // and the mask-compare will determine definitely whether we have
1918 // a match at this character position. 1937 // a match at this character position.
1919 pos->mask = char_mask; 1938 pos->mask = char_mask;
1920 pos->value = c; 1939 pos->value = c;
1921 pos->determines_perfectly = true; 1940 pos->determines_perfectly = true;
1922 } else { 1941 } else {
1923 uint32_t common_bits = char_mask; 1942 uint32_t common_bits = char_mask;
(...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after
2403 // loading characters, which means we do not need to recheck the bounds 2422 // loading characters, which means we do not need to recheck the bounds
2404 // up to the limit the quick check already checked. In addition the quick 2423 // up to the limit the quick check already checked. In addition the quick
2405 // check can have involved a mask and compare operation which may simplify 2424 // check can have involved a mask and compare operation which may simplify
2406 // or obviate the need for further checks at some character positions. 2425 // or obviate the need for further checks at some character positions.
2407 void TextNode::TextEmitPass(RegExpCompiler* compiler, 2426 void TextNode::TextEmitPass(RegExpCompiler* compiler,
2408 TextEmitPassType pass, 2427 TextEmitPassType pass,
2409 bool preloaded, 2428 bool preloaded,
2410 Trace* trace, 2429 Trace* trace,
2411 bool first_element_checked, 2430 bool first_element_checked,
2412 int* checked_up_to) { 2431 int* checked_up_to) {
2432 Isolate* isolate = Isolate::Current();
2413 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2433 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2414 bool ascii = compiler->ascii(); 2434 bool ascii = compiler->ascii();
2415 Label* backtrack = trace->backtrack(); 2435 Label* backtrack = trace->backtrack();
2416 QuickCheckDetails* quick_check = trace->quick_check_performed(); 2436 QuickCheckDetails* quick_check = trace->quick_check_performed();
2417 int element_count = elms_->length(); 2437 int element_count = elms_->length();
2418 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 2438 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2419 TextElement elm = elms_->at(i); 2439 TextElement elm = elms_->at(i);
2420 int cp_offset = trace->cp_offset() + elm.cp_offset; 2440 int cp_offset = trace->cp_offset() + elm.cp_offset;
2421 if (elm.type == TextElement::ATOM) { 2441 if (elm.type == TextElement::ATOM) {
2422 Vector<const uc16> quarks = elm.data.u_atom->data(); 2442 Vector<const uc16> quarks = elm.data.u_atom->data();
(...skipping 15 matching lines...) Expand all
2438 case SIMPLE_CHARACTER_MATCH: 2458 case SIMPLE_CHARACTER_MATCH:
2439 emit_function = &EmitSimpleCharacter; 2459 emit_function = &EmitSimpleCharacter;
2440 break; 2460 break;
2441 case CASE_CHARACTER_MATCH: 2461 case CASE_CHARACTER_MATCH:
2442 emit_function = &EmitAtomLetter; 2462 emit_function = &EmitAtomLetter;
2443 break; 2463 break;
2444 default: 2464 default:
2445 break; 2465 break;
2446 } 2466 }
2447 if (emit_function != NULL) { 2467 if (emit_function != NULL) {
2448 bool bound_checked = emit_function(compiler, 2468 bool bound_checked = emit_function(isolate,
2469 compiler,
2449 quarks[j], 2470 quarks[j],
2450 backtrack, 2471 backtrack,
2451 cp_offset + j, 2472 cp_offset + j,
2452 *checked_up_to < cp_offset + j, 2473 *checked_up_to < cp_offset + j,
2453 preloaded); 2474 preloaded);
2454 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 2475 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2455 } 2476 }
2456 } 2477 }
2457 } else { 2478 } else {
2458 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 2479 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
(...skipping 1619 matching lines...) Expand 10 before | Expand all | Expand 10 after
4078 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); 4099 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
4079 for (int i = 0; i < overlay.length(); i += 2) { 4100 for (int i = 0; i < overlay.length(); i += 2) {
4080 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), 4101 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
4081 CharacterRangeSplitter::kInOverlay); 4102 CharacterRangeSplitter::kInOverlay);
4082 } 4103 }
4083 CharacterRangeSplitter callback(included, excluded); 4104 CharacterRangeSplitter callback(included, excluded);
4084 table.ForEach(&callback); 4105 table.ForEach(&callback);
4085 } 4106 }
4086 4107
4087 4108
4088 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, 4109 static void AddUncanonicals(Isolate* isolate,
4110 ZoneList<CharacterRange>* ranges,
4089 int bottom, 4111 int bottom,
4090 int top); 4112 int top);
4091 4113
4092 4114
4093 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 4115 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
4094 bool is_ascii) { 4116 bool is_ascii) {
4117 Isolate* isolate = Isolate::Current();
4095 uc16 bottom = from(); 4118 uc16 bottom = from();
4096 uc16 top = to(); 4119 uc16 top = to();
4097 if (is_ascii) { 4120 if (is_ascii) {
4098 if (bottom > String::kMaxAsciiCharCode) return; 4121 if (bottom > String::kMaxAsciiCharCode) return;
4099 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; 4122 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
4100 } 4123 }
4101 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4124 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4102 if (top == bottom) { 4125 if (top == bottom) {
4103 // If this is a singleton we just expand the one character. 4126 // If this is a singleton we just expand the one character.
4104 int length = uncanonicalize.get(bottom, '\0', chars); 4127 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
4105 for (int i = 0; i < length; i++) { 4128 for (int i = 0; i < length; i++) {
4106 uc32 chr = chars[i]; 4129 uc32 chr = chars[i];
4107 if (chr != bottom) { 4130 if (chr != bottom) {
4108 ranges->Add(CharacterRange::Singleton(chars[i])); 4131 ranges->Add(CharacterRange::Singleton(chars[i]));
4109 } 4132 }
4110 } 4133 }
4111 } else { 4134 } else {
4112 // If this is a range we expand the characters block by block, 4135 // If this is a range we expand the characters block by block,
4113 // expanding contiguous subranges (blocks) one at a time. 4136 // expanding contiguous subranges (blocks) one at a time.
4114 // The approach is as follows. For a given start character we 4137 // The approach is as follows. For a given start character we
4115 // look up the remainder of the block that contains it (represented 4138 // look up the remainder of the block that contains it (represented
4116 // by the end point), for instance we find 'z' if the character 4139 // by the end point), for instance we find 'z' if the character
4117 // is 'c'. A block is characterized by the property 4140 // is 'c'. A block is characterized by the property
4118 // that all characters uncanonicalize in the same way, except that 4141 // that all characters uncanonicalize in the same way, except that
4119 // each entry in the result is incremented by the distance from the first 4142 // each entry in the result is incremented by the distance from the first
4120 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and 4143 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
4121 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. 4144 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
4122 // Once we've found the end point we look up its uncanonicalization 4145 // Once we've found the end point we look up its uncanonicalization
4123 // and produce a range for each element. For instance for [c-f] 4146 // and produce a range for each element. For instance for [c-f]
4124 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only 4147 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
4125 // add a range if it is not already contained in the input, so [c-f] 4148 // add a range if it is not already contained in the input, so [c-f]
4126 // will be skipped but [C-F] will be added. If this range is not 4149 // will be skipped but [C-F] will be added. If this range is not
4127 // completely contained in a block we do this for all the blocks 4150 // completely contained in a block we do this for all the blocks
4128 // covered by the range (handling characters that is not in a block 4151 // covered by the range (handling characters that is not in a block
4129 // as a "singleton block"). 4152 // as a "singleton block").
4130 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4153 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4131 int pos = bottom; 4154 int pos = bottom;
4132 while (pos < top) { 4155 while (pos < top) {
4133 int length = canonrange.get(pos, '\0', range); 4156 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
4134 uc16 block_end; 4157 uc16 block_end;
4135 if (length == 0) { 4158 if (length == 0) {
4136 block_end = pos; 4159 block_end = pos;
4137 } else { 4160 } else {
4138 ASSERT_EQ(1, length); 4161 ASSERT_EQ(1, length);
4139 block_end = range[0]; 4162 block_end = range[0];
4140 } 4163 }
4141 int end = (block_end > top) ? top : block_end; 4164 int end = (block_end > top) ? top : block_end;
4142 length = uncanonicalize.get(block_end, '\0', range); 4165 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
4143 for (int i = 0; i < length; i++) { 4166 for (int i = 0; i < length; i++) {
4144 uc32 c = range[i]; 4167 uc32 c = range[i];
4145 uc16 range_from = c - (block_end - pos); 4168 uc16 range_from = c - (block_end - pos);
4146 uc16 range_to = c - (block_end - end); 4169 uc16 range_to = c - (block_end - end);
4147 if (!(bottom <= range_from && range_to <= top)) { 4170 if (!(bottom <= range_from && range_to <= top)) {
4148 ranges->Add(CharacterRange(range_from, range_to)); 4171 ranges->Add(CharacterRange(range_from, range_to));
4149 } 4172 }
4150 } 4173 }
4151 pos = end + 1; 4174 pos = end + 1;
4152 } 4175 }
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
4242 result.SetElementsInSecondSet(); 4265 result.SetElementsInSecondSet();
4243 } else if (j < range->length()) { 4266 } else if (j < range->length()) {
4244 // Argument range contains something not in word range. 4267 // Argument range contains something not in word range.
4245 result.SetElementsInFirstSet(); 4268 result.SetElementsInFirstSet();
4246 } 4269 }
4247 4270
4248 return result; 4271 return result;
4249 } 4272 }
4250 4273
4251 4274
4252 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, 4275 static void AddUncanonicals(Isolate* isolate,
4276 ZoneList<CharacterRange>* ranges,
4253 int bottom, 4277 int bottom,
4254 int top) { 4278 int top) {
4255 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4279 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4256 // Zones with no case mappings. There is a DEBUG-mode loop to assert that 4280 // Zones with no case mappings. There is a DEBUG-mode loop to assert that
4257 // this table is correct. 4281 // this table is correct.
4258 // 0x0600 - 0x0fff 4282 // 0x0600 - 0x0fff
4259 // 0x1100 - 0x1cff 4283 // 0x1100 - 0x1cff
4260 // 0x2000 - 0x20ff 4284 // 0x2000 - 0x20ff
4261 // 0x2200 - 0x23ff 4285 // 0x2200 - 0x23ff
4262 // 0x2500 - 0x2bff 4286 // 0x2500 - 0x2bff
(...skipping 11 matching lines...) Expand all
4274 if (top <= boundaries[0]) { 4298 if (top <= boundaries[0]) {
4275 CharacterRange range(bottom, top); 4299 CharacterRange range(bottom, top);
4276 range.AddCaseEquivalents(ranges, false); 4300 range.AddCaseEquivalents(ranges, false);
4277 return; 4301 return;
4278 } 4302 }
4279 4303
4280 // Split up very large ranges. This helps remove ranges where there are no 4304 // Split up very large ranges. This helps remove ranges where there are no
4281 // case mappings. 4305 // case mappings.
4282 for (int i = 0; i < boundary_count; i++) { 4306 for (int i = 0; i < boundary_count; i++) {
4283 if (bottom < boundaries[i] && top >= boundaries[i]) { 4307 if (bottom < boundaries[i] && top >= boundaries[i]) {
4284 AddUncanonicals(ranges, bottom, boundaries[i] - 1); 4308 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
4285 AddUncanonicals(ranges, boundaries[i], top); 4309 AddUncanonicals(isolate, ranges, boundaries[i], top);
4286 return; 4310 return;
4287 } 4311 }
4288 } 4312 }
4289 4313
4290 // If we are completely in a zone with no case mappings then we are done. 4314 // If we are completely in a zone with no case mappings then we are done.
4291 for (int i = 0; i < boundary_count; i += 2) { 4315 for (int i = 0; i < boundary_count; i += 2) {
4292 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { 4316 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
4293 #ifdef DEBUG 4317 #ifdef DEBUG
4294 for (int j = bottom; j <= top; j++) { 4318 for (int j = bottom; j <= top; j++) {
4295 unsigned current_char = j; 4319 unsigned current_char = j;
4296 int length = uncanonicalize.get(current_char, '\0', chars); 4320 int length = isolate->jsregexp_uncanonicalize()->get(current_char,
4321 '\0', chars);
4297 for (int k = 0; k < length; k++) { 4322 for (int k = 0; k < length; k++) {
4298 ASSERT(chars[k] == current_char); 4323 ASSERT(chars[k] == current_char);
4299 } 4324 }
4300 } 4325 }
4301 #endif 4326 #endif
4302 return; 4327 return;
4303 } 4328 }
4304 } 4329 }
4305 4330
4306 // Step through the range finding equivalent characters. 4331 // Step through the range finding equivalent characters.
4307 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); 4332 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
4308 for (int i = bottom; i <= top; i++) { 4333 for (int i = bottom; i <= top; i++) {
4309 int length = uncanonicalize.get(i, '\0', chars); 4334 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
4310 for (int j = 0; j < length; j++) { 4335 for (int j = 0; j < length; j++) {
4311 uc32 chr = chars[j]; 4336 uc32 chr = chars[j];
4312 if (chr != i && (chr < bottom || chr > top)) { 4337 if (chr != i && (chr < bottom || chr > top)) {
4313 characters->Add(chr); 4338 characters->Add(chr);
4314 } 4339 }
4315 } 4340 }
4316 } 4341 }
4317 4342
4318 // Step through the equivalent characters finding simple ranges and 4343 // Step through the equivalent characters finding simple ranges and
4319 // adding ranges to the character class. 4344 // adding ranges to the character class.
(...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after
4821 else 4846 else
4822 return empty(); 4847 return empty();
4823 } 4848 }
4824 4849
4825 4850
4826 // ------------------------------------------------------------------- 4851 // -------------------------------------------------------------------
4827 // Analysis 4852 // Analysis
4828 4853
4829 4854
4830 void Analysis::EnsureAnalyzed(RegExpNode* that) { 4855 void Analysis::EnsureAnalyzed(RegExpNode* that) {
4831 StackLimitCheck check; 4856 StackLimitCheck check(Isolate::Current());
4832 if (check.HasOverflowed()) { 4857 if (check.HasOverflowed()) {
4833 fail("Stack overflow"); 4858 fail("Stack overflow");
4834 return; 4859 return;
4835 } 4860 }
4836 if (that->info()->been_analyzed || that->info()->being_analyzed) 4861 if (that->info()->been_analyzed || that->info()->being_analyzed)
4837 return; 4862 return;
4838 that->info()->being_analyzed = true; 4863 that->info()->being_analyzed = true;
4839 that->Accept(this); 4864 that->Accept(this);
4840 that->info()->being_analyzed = false; 4865 that->info()->being_analyzed = false;
4841 that->info()->been_analyzed = true; 4866 that->info()->been_analyzed = true;
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after
5331 macro_assembler.SetCurrentPositionFromEnd(max_length); 5356 macro_assembler.SetCurrentPositionFromEnd(max_length);
5332 } 5357 }
5333 5358
5334 return compiler.Assemble(&macro_assembler, 5359 return compiler.Assemble(&macro_assembler,
5335 node, 5360 node,
5336 data->capture_count, 5361 data->capture_count,
5337 pattern); 5362 pattern);
5338 } 5363 }
5339 5364
5340 5365
5341 int OffsetsVector::static_offsets_vector_[
5342 OffsetsVector::kStaticOffsetsVectorSize];
5343
5344 }} // namespace v8::internal 5366 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/jump-target-heavy.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698