src/jsregexp.cc - Issue 6685088: Merge isolates to bleeding_edge.

Side by Side Diff: src/jsregexp.cc

Issue 6685088: Merge isolates to bleeding_edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved.	1 // Copyright 2006-2009 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 17 matching lines...) Expand all Loading...
28 #include "v8.h"	28 #include "v8.h"

29	29

30 #include "ast.h"	30 #include "ast.h"

31 #include "compiler.h"	31 #include "compiler.h"

32 #include "execution.h"	32 #include "execution.h"

33 #include "factory.h"	33 #include "factory.h"

34 #include "jsregexp.h"	34 #include "jsregexp.h"

35 #include "platform.h"	35 #include "platform.h"

36 #include "string-search.h"	36 #include "string-search.h"

37 #include "runtime.h"	37 #include "runtime.h"

38 #include "top.h"

39 #include "compilation-cache.h"	38 #include "compilation-cache.h"

40 #include "string-stream.h"	39 #include "string-stream.h"

41 #include "parser.h"	40 #include "parser.h"

42 #include "regexp-macro-assembler.h"	41 #include "regexp-macro-assembler.h"

43 #include "regexp-macro-assembler-tracer.h"	42 #include "regexp-macro-assembler-tracer.h"

44 #include "regexp-macro-assembler-irregexp.h"	43 #include "regexp-macro-assembler-irregexp.h"

45 #include "regexp-stack.h"	44 #include "regexp-stack.h"

46	45

47 #ifndef V8_INTERPRETED_REGEXP	46 #ifndef V8_INTERPRETED_REGEXP

48 #if V8_TARGET_ARCH_IA32	47 #if V8_TARGET_ARCH_IA32

49 #include "ia32/regexp-macro-assembler-ia32.h"	48 #include "ia32/regexp-macro-assembler-ia32.h"

50 #elif V8_TARGET_ARCH_X64	49 #elif V8_TARGET_ARCH_X64

51 #include "x64/regexp-macro-assembler-x64.h"	50 #include "x64/regexp-macro-assembler-x64.h"

52 #elif V8_TARGET_ARCH_ARM	51 #elif V8_TARGET_ARCH_ARM

53 #include "arm/regexp-macro-assembler-arm.h"	52 #include "arm/regexp-macro-assembler-arm.h"

54 #else	53 #else

55 #error Unsupported target architecture.	54 #error Unsupported target architecture.

56 #endif	55 #endif

57 #endif	56 #endif

58	57

59 #include "interpreter-irregexp.h"	58 #include "interpreter-irregexp.h"

60	59

61	60

62 namespace v8 {	61 namespace v8 {

63 namespace internal {	62 namespace internal {

64	63

65

66 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,	64 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,

67 Handle<String> pattern,	65 Handle<String> pattern,

68 Handle<String> flags,	66 Handle<String> flags,

69 bool* has_pending_exception) {	67 bool* has_pending_exception) {

70 // Call the construct code with 2 arguments.	68 // Call the construct code with 2 arguments.

71 Object** argv[2] = { Handle<Object>::cast(pattern).location(),	69 Object** argv[2] = { Handle<Object>::cast(pattern).location(),

72 Handle<Object>::cast(flags).location() };	70 Handle<Object>::cast(flags).location() };

73 return Execution::New(constructor, 2, argv, has_pending_exception);	71 return Execution::New(constructor, 2, argv, has_pending_exception);

74 }	72 }

75	73

(...skipping 14 matching lines...) Expand all Loading...
90 }	88 }

91 }	89 }

92 return JSRegExp::Flags(flags);	90 return JSRegExp::Flags(flags);

93 }	91 }

94	92

95	93

96 static inline void ThrowRegExpException(Handle<JSRegExp> re,	94 static inline void ThrowRegExpException(Handle<JSRegExp> re,

97 Handle<String> pattern,	95 Handle<String> pattern,

98 Handle<String> error_text,	96 Handle<String> error_text,

99 const char* message) {	97 const char* message) {

100 Handle<FixedArray> elements = Factory::NewFixedArray(2);	98 Isolate* isolate = re->GetIsolate();

	99 Factory* factory = isolate->factory();

	100 Handle<FixedArray> elements = factory->NewFixedArray(2);

101 elements->set(0, *pattern);	101 elements->set(0, *pattern);

102 elements->set(1, *error_text);	102 elements->set(1, *error_text);

103 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements);	103 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

104 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);	104 Handle<Object> regexp_err = factory->NewSyntaxError(message, array);

105 Top::Throw(*regexp_err);	105 isolate->Throw(*regexp_err);

106 }	106 }

107	107

108	108

109 // Generic RegExp methods. Dispatches to implementation specific methods.	109 // Generic RegExp methods. Dispatches to implementation specific methods.

110	110

111	111

112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,	112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,

113 Handle<String> pattern,	113 Handle<String> pattern,

114 Handle<String> flag_str) {	114 Handle<String> flag_str) {

	115 Isolate* isolate = re->GetIsolate();

115 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);	116 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);

116 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);	117 CompilationCache* compilation_cache = isolate->compilation_cache();

	118 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);

117 bool in_cache = !cached.is_null();	119 bool in_cache = !cached.is_null();

118 LOG(RegExpCompileEvent(re, in_cache));	120 LOG(isolate, RegExpCompileEvent(re, in_cache));

119	121

120 Handle<Object> result;	122 Handle<Object> result;

121 if (in_cache) {	123 if (in_cache) {

122 re->set_data(*cached);	124 re->set_data(*cached);

123 return re;	125 return re;

124 }	126 }

125 pattern = FlattenGetString(pattern);	127 pattern = FlattenGetString(pattern);

126 CompilationZoneScope zone_scope(DELETE_ON_EXIT);	128 CompilationZoneScope zone_scope(DELETE_ON_EXIT);

127 PostponeInterruptsScope postpone;	129 PostponeInterruptsScope postpone(isolate);

128 RegExpCompileData parse_result;	130 RegExpCompileData parse_result;

129 FlatStringReader reader(pattern);	131 FlatStringReader reader(isolate, pattern);

130 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),	132 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),

131 &parse_result)) {	133 &parse_result)) {

132 // Throw an exception if we fail to parse the pattern.	134 // Throw an exception if we fail to parse the pattern.

133 ThrowRegExpException(re,	135 ThrowRegExpException(re,

134 pattern,	136 pattern,

135 parse_result.error,	137 parse_result.error,

136 "malformed_regexp");	138 "malformed_regexp");

137 return Handle<Object>::null();	139 return Handle<Object>::null();

138 }	140 }

139	141

140 if (parse_result.simple && !flags.is_ignore_case()) {	142 if (parse_result.simple && !flags.is_ignore_case()) {

141 // Parse-tree is a single atom that is equal to the pattern.	143 // Parse-tree is a single atom that is equal to the pattern.

142 AtomCompile(re, pattern, flags, pattern);	144 AtomCompile(re, pattern, flags, pattern);

143 } else if (parse_result.tree->IsAtom() &&	145 } else if (parse_result.tree->IsAtom() &&

144 !flags.is_ignore_case() &&	146 !flags.is_ignore_case() &&

145 parse_result.capture_count == 0) {	147 parse_result.capture_count == 0) {

146 RegExpAtom* atom = parse_result.tree->AsAtom();	148 RegExpAtom* atom = parse_result.tree->AsAtom();

147 Vector<const uc16> atom_pattern = atom->data();	149 Vector<const uc16> atom_pattern = atom->data();

148 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);	150 Handle<String> atom_string =

	151 isolate->factory()->NewStringFromTwoByte(atom_pattern);

149 AtomCompile(re, pattern, flags, atom_string);	152 AtomCompile(re, pattern, flags, atom_string);

150 } else {	153 } else {

151 IrregexpInitialize(re, pattern, flags, parse_result.capture_count);	154 IrregexpInitialize(re, pattern, flags, parse_result.capture_count);

152 }	155 }

153 ASSERT(re->data()->IsFixedArray());	156 ASSERT(re->data()->IsFixedArray());

154 // Compilation succeeded so the data is set on the regexp	157 // Compilation succeeded so the data is set on the regexp

155 // and we can store it in the cache.	158 // and we can store it in the cache.

156 Handle<FixedArray> data(FixedArray::cast(re->data()));	159 Handle<FixedArray> data(FixedArray::cast(re->data()));

157 CompilationCache::PutRegExp(pattern, flags, data);	160 compilation_cache->PutRegExp(pattern, flags, data);

158	161

159 return re;	162 return re;

160 }	163 }

161	164

162	165

163 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,	166 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,

164 Handle<String> subject,	167 Handle<String> subject,

165 int index,	168 int index,

166 Handle<JSArray> last_match_info) {	169 Handle<JSArray> last_match_info) {

167 switch (regexp->TypeTag()) {	170 switch (regexp->TypeTag()) {

168 case JSRegExp::ATOM:	171 case JSRegExp::ATOM:

169 return AtomExec(regexp, subject, index, last_match_info);	172 return AtomExec(regexp, subject, index, last_match_info);

170 case JSRegExp::IRREGEXP: {	173 case JSRegExp::IRREGEXP: {

171 Handle<Object> result =	174 Handle<Object> result =

172 IrregexpExec(regexp, subject, index, last_match_info);	175 IrregexpExec(regexp, subject, index, last_match_info);

173 ASSERT(!result.is_null() \|\| Top::has_pending_exception());	176 ASSERT(!result.is_null() \|\| Isolate::Current()->has_pending_exception());

174 return result;	177 return result;

175 }	178 }

176 default:	179 default:

177 UNREACHABLE();	180 UNREACHABLE();

178 return Handle<Object>::null();	181 return Handle<Object>::null();

179 }	182 }

180 }	183 }

181	184

182	185

183 // RegExp Atom implementation: Simple string search using indexOf.	186 // RegExp Atom implementation: Simple string search using indexOf.

184	187

185	188

186 void RegExpImpl::AtomCompile(Handle<JSRegExp> re,	189 void RegExpImpl::AtomCompile(Handle<JSRegExp> re,

187 Handle<String> pattern,	190 Handle<String> pattern,

188 JSRegExp::Flags flags,	191 JSRegExp::Flags flags,

189 Handle<String> match_pattern) {	192 Handle<String> match_pattern) {

190 Factory::SetRegExpAtomData(re,	193 re->GetIsolate()->factory()->SetRegExpAtomData(re,

191 JSRegExp::ATOM,	194 JSRegExp::ATOM,

192 pattern,	195 pattern,

193 flags,	196 flags,

194 match_pattern);	197 match_pattern);

195 }	198 }

196	199

197	200

198 static void SetAtomLastCapture(FixedArray* array,	201 static void SetAtomLastCapture(FixedArray* array,

199 String* subject,	202 String* subject,

200 int from,	203 int from,

201 int to) {	204 int to) {

202 NoHandleAllocation no_handles;	205 NoHandleAllocation no_handles;

203 RegExpImpl::SetLastCaptureCount(array, 2);	206 RegExpImpl::SetLastCaptureCount(array, 2);

204 RegExpImpl::SetLastSubject(array, subject);	207 RegExpImpl::SetLastSubject(array, subject);

(...skipping 12 matching lines...) Expand all Loading...
217	220

218 int subject_length = sub_vector.length();	221 int subject_length = sub_vector.length();

219 if (start_index + pattern_length > subject_length) return -1;	222 if (start_index + pattern_length > subject_length) return -1;

220 return SearchString(sub_vector, pat_vector, start_index);	223 return SearchString(sub_vector, pat_vector, start_index);

221 }	224 }

222 */	225 */

223 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,	226 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,

224 Handle<String> subject,	227 Handle<String> subject,

225 int index,	228 int index,

226 Handle<JSArray> last_match_info) {	229 Handle<JSArray> last_match_info) {

	230 Isolate* isolate = re->GetIsolate();

	231

227 ASSERT(0 <= index);	232 ASSERT(0 <= index);

228 ASSERT(index <= subject->length());	233 ASSERT(index <= subject->length());

229	234

230 if (!subject->IsFlat()) FlattenString(subject);	235 if (!subject->IsFlat()) FlattenString(subject);

231 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid	236 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid

232 // Extract flattened substrings of cons strings before determining asciiness.	237 // Extract flattened substrings of cons strings before determining asciiness.

233 String* seq_sub = *subject;	238 String* seq_sub = *subject;

234 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();	239 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();

235	240

236 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));	241 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));

237 int needle_len = needle->length();	242 int needle_len = needle->length();

238	243

239 if (needle_len != 0) {	244 if (needle_len != 0) {

240 if (index + needle_len > subject->length()) return Factory::null_value();	245 if (index + needle_len > subject->length())

	246 return isolate->factory()->null_value();

	247

241 // dispatch on type of strings	248 // dispatch on type of strings

242 index = (needle->IsAsciiRepresentation()	249 index = (needle->IsAsciiRepresentation()

243 ? (seq_sub->IsAsciiRepresentation()	250 ? (seq_sub->IsAsciiRepresentation()

244 ? SearchString(seq_sub->ToAsciiVector(),	251 ? SearchString(isolate,

	252 seq_sub->ToAsciiVector(),

245 needle->ToAsciiVector(),	253 needle->ToAsciiVector(),

246 index)	254 index)

247 : SearchString(seq_sub->ToUC16Vector(),	255 : SearchString(isolate,

	256 seq_sub->ToUC16Vector(),

248 needle->ToAsciiVector(),	257 needle->ToAsciiVector(),

249 index))	258 index))

250 : (seq_sub->IsAsciiRepresentation()	259 : (seq_sub->IsAsciiRepresentation()

251 ? SearchString(seq_sub->ToAsciiVector(),	260 ? SearchString(isolate,

	261 seq_sub->ToAsciiVector(),

252 needle->ToUC16Vector(),	262 needle->ToUC16Vector(),

253 index)	263 index)

254 : SearchString(seq_sub->ToUC16Vector(),	264 : SearchString(isolate,

	265 seq_sub->ToUC16Vector(),

255 needle->ToUC16Vector(),	266 needle->ToUC16Vector(),

256 index)));	267 index)));

257 if (index == -1) return Factory::null_value();	268 if (index == -1) return FACTORY->null_value();

258 }	269 }

259 ASSERT(last_match_info->HasFastElements());	270 ASSERT(last_match_info->HasFastElements());

260	271

261 {	272 {

262 NoHandleAllocation no_handles;	273 NoHandleAllocation no_handles;

263 FixedArray* array = FixedArray::cast(last_match_info->elements());	274 FixedArray* array = FixedArray::cast(last_match_info->elements());

264 SetAtomLastCapture(array, *subject, index, index + needle_len);	275 SetAtomLastCapture(array, *subject, index, index + needle_len);

265 }	276 }

266 return last_match_info;	277 return last_match_info;

267 }	278 }

(...skipping 13 matching lines...) Expand all Loading...
281 if (compiled_code->IsByteArray()) return true;	292 if (compiled_code->IsByteArray()) return true;

282 #else // V8_INTERPRETED_REGEXP (RegExp native code)	293 #else // V8_INTERPRETED_REGEXP (RegExp native code)

283 if (compiled_code->IsCode()) return true;	294 if (compiled_code->IsCode()) return true;

284 #endif	295 #endif

285 return CompileIrregexp(re, is_ascii);	296 return CompileIrregexp(re, is_ascii);

286 }	297 }

287	298

288	299

289 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {	300 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {

290 // Compile the RegExp.	301 // Compile the RegExp.

	302 Isolate* isolate = re->GetIsolate();

291 CompilationZoneScope zone_scope(DELETE_ON_EXIT);	303 CompilationZoneScope zone_scope(DELETE_ON_EXIT);

292 PostponeInterruptsScope postpone;	304 PostponeInterruptsScope postpone(isolate);

293 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));	305 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));

294 if (entry->IsJSObject()) {	306 if (entry->IsJSObject()) {

295 // If it's a JSObject, a previous compilation failed and threw this object.	307 // If it's a JSObject, a previous compilation failed and threw this object.

296 // Re-throw the object without trying again.	308 // Re-throw the object without trying again.

297 Top::Throw(entry);	309 isolate->Throw(entry);

298 return false;	310 return false;

299 }	311 }

300 ASSERT(entry->IsTheHole());	312 ASSERT(entry->IsTheHole());

301	313

302 JSRegExp::Flags flags = re->GetFlags();	314 JSRegExp::Flags flags = re->GetFlags();

303	315

304 Handle<String> pattern(re->Pattern());	316 Handle<String> pattern(re->Pattern());

305 if (!pattern->IsFlat()) {	317 if (!pattern->IsFlat()) {

306 FlattenString(pattern);	318 FlattenString(pattern);

307 }	319 }

308	320

309 RegExpCompileData compile_data;	321 RegExpCompileData compile_data;

310 FlatStringReader reader(pattern);	322 FlatStringReader reader(isolate, pattern);

311 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),	323 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),

312 &compile_data)) {	324 &compile_data)) {

313 // Throw an exception if we fail to parse the pattern.	325 // Throw an exception if we fail to parse the pattern.

314 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.	326 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.

315 ThrowRegExpException(re,	327 ThrowRegExpException(re,

316 pattern,	328 pattern,

317 compile_data.error,	329 compile_data.error,

318 "malformed_regexp");	330 "malformed_regexp");

319 return false;	331 return false;

320 }	332 }

321 RegExpEngine::CompilationResult result =	333 RegExpEngine::CompilationResult result =

322 RegExpEngine::Compile(&compile_data,	334 RegExpEngine::Compile(&compile_data,

323 flags.is_ignore_case(),	335 flags.is_ignore_case(),

324 flags.is_multiline(),	336 flags.is_multiline(),

325 pattern,	337 pattern,

326 is_ascii);	338 is_ascii);

327 if (result.error_message != NULL) {	339 if (result.error_message != NULL) {

328 // Unable to compile regexp.	340 // Unable to compile regexp.

329 Handle<FixedArray> elements = Factory::NewFixedArray(2);	341 Factory* factory = isolate->factory();

	342 Handle<FixedArray> elements = factory->NewFixedArray(2);

330 elements->set(0, *pattern);	343 elements->set(0, *pattern);

331 Handle<String> error_message =	344 Handle<String> error_message =

332 Factory::NewStringFromUtf8(CStrVector(result.error_message));	345 factory->NewStringFromUtf8(CStrVector(result.error_message));

333 elements->set(1, *error_message);	346 elements->set(1, *error_message);

334 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements);	347 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

335 Handle<Object> regexp_err =	348 Handle<Object> regexp_err =

336 Factory::NewSyntaxError("malformed_regexp", array);	349 factory->NewSyntaxError("malformed_regexp", array);

337 Top::Throw(*regexp_err);	350 isolate->Throw(*regexp_err);

338 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);	351 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);

339 return false;	352 return false;

340 }	353 }

341	354

342 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));	355 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));

343 data->set(JSRegExp::code_index(is_ascii), result.code);	356 data->set(JSRegExp::code_index(is_ascii), result.code);

344 int register_max = IrregexpMaxRegisterCount(*data);	357 int register_max = IrregexpMaxRegisterCount(*data);

345 if (result.num_registers > register_max) {	358 if (result.num_registers > register_max) {

346 SetIrregexpMaxRegisterCount(*data, result.num_registers);	359 SetIrregexpMaxRegisterCount(*data, result.num_registers);

347 }	360 }

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
379 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {	392 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {

380 return Code::cast(re->get(JSRegExp::code_index(is_ascii)));	393 return Code::cast(re->get(JSRegExp::code_index(is_ascii)));

381 }	394 }

382	395

383	396

384 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,	397 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,

385 Handle<String> pattern,	398 Handle<String> pattern,

386 JSRegExp::Flags flags,	399 JSRegExp::Flags flags,

387 int capture_count) {	400 int capture_count) {

388 // Initialize compiled code entries to null.	401 // Initialize compiled code entries to null.

389 Factory::SetRegExpIrregexpData(re,	402 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,

390 JSRegExp::IRREGEXP,	403 JSRegExp::IRREGEXP,

391 pattern,	404 pattern,

392 flags,	405 flags,

393 capture_count);	406 capture_count);

394 }	407 }

395	408

396	409

397 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,	410 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,

398 Handle<String> subject) {	411 Handle<String> subject) {

399 if (!subject->IsFlat()) {	412 if (!subject->IsFlat()) {

400 FlattenString(subject);	413 FlattenString(subject);

401 }	414 }

402 // Check the asciiness of the underlying storage.	415 // Check the asciiness of the underlying storage.

403 bool is_ascii;	416 bool is_ascii;

(...skipping 17 matching lines...) Expand all Loading...
421 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;	434 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;

422 #endif // V8_INTERPRETED_REGEXP	435 #endif // V8_INTERPRETED_REGEXP

423 }	436 }

424	437

425	438

426 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(	439 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(

427 Handle<JSRegExp> regexp,	440 Handle<JSRegExp> regexp,

428 Handle<String> subject,	441 Handle<String> subject,

429 int index,	442 int index,

430 Vector<int> output) {	443 Vector<int> output) {

431 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));	444 Isolate* isolate = regexp->GetIsolate();

	445

	446 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);

432	447

433 ASSERT(index >= 0);	448 ASSERT(index >= 0);

434 ASSERT(index <= subject->length());	449 ASSERT(index <= subject->length());

435 ASSERT(subject->IsFlat());	450 ASSERT(subject->IsFlat());

436	451

437 // A flat ASCII string might have a two-byte first part.	452 // A flat ASCII string might have a two-byte first part.

438 if (subject->IsConsString()) {	453 if (subject->IsConsString()) {

439 subject = Handle<String>(ConsString::cast(*subject)->first());	454 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);

440 }	455 }

441	456

442 #ifndef V8_INTERPRETED_REGEXP	457 #ifndef V8_INTERPRETED_REGEXP

443 ASSERT(output.length() >=	458 ASSERT(output.length() >= (IrregexpNumberOfCaptures(irregexp) + 1) 2);

444 (IrregexpNumberOfCaptures(irregexp) + 1) 2);

445 do {	459 do {

446 bool is_ascii = subject->IsAsciiRepresentation();	460 bool is_ascii = subject->IsAsciiRepresentation();

447 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii));	461 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);

448 NativeRegExpMacroAssembler::Result res =	462 NativeRegExpMacroAssembler::Result res =

449 NativeRegExpMacroAssembler::Match(code,	463 NativeRegExpMacroAssembler::Match(code,

450 subject,	464 subject,

451 output.start(),	465 output.start(),

452 output.length(),	466 output.length(),

453 index);	467 index,

	468 isolate);

454 if (res != NativeRegExpMacroAssembler::RETRY) {	469 if (res != NativeRegExpMacroAssembler::RETRY) {

455 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION \|\|	470 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION \|\|

456 Top::has_pending_exception());	471 isolate->has_pending_exception());

457 STATIC_ASSERT(	472 STATIC_ASSERT(

458 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);	473 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);

459 STATIC_ASSERT(	474 STATIC_ASSERT(

460 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);	475 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);

461 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)	476 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)

462 == RE_EXCEPTION);	477 == RE_EXCEPTION);

463 return static_cast<IrregexpResult>(res);	478 return static_cast<IrregexpResult>(res);

464 }	479 }

465 // If result is RETRY, the string has changed representation, and we	480 // If result is RETRY, the string has changed representation, and we

466 // must restart from scratch.	481 // must restart from scratch.

(...skipping 10 matching lines...) Expand all Loading...
477 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));	492 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));

478 bool is_ascii = subject->IsAsciiRepresentation();	493 bool is_ascii = subject->IsAsciiRepresentation();

479 // We must have done EnsureCompiledIrregexp, so we can get the number of	494 // We must have done EnsureCompiledIrregexp, so we can get the number of

480 // registers.	495 // registers.

481 int* register_vector = output.start();	496 int* register_vector = output.start();

482 int number_of_capture_registers =	497 int number_of_capture_registers =

483 (IrregexpNumberOfCaptures(irregexp) + 1) 2;	498 (IrregexpNumberOfCaptures(irregexp) + 1) 2;

484 for (int i = number_of_capture_registers - 1; i >= 0; i--) {	499 for (int i = number_of_capture_registers - 1; i >= 0; i--) {

485 register_vector[i] = -1;	500 register_vector[i] = -1;

486 }	501 }

487 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii));	502 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);

488	503

489 if (IrregexpInterpreter::Match(byte_codes,	504 if (IrregexpInterpreter::Match(byte_codes,

490 subject,	505 subject,

491 register_vector,	506 register_vector,

492 index)) {	507 index)) {

493 return RE_SUCCESS;	508 return RE_SUCCESS;

494 }	509 }

495 return RE_FAILURE;	510 return RE_FAILURE;

496 #endif // V8_INTERPRETED_REGEXP	511 #endif // V8_INTERPRETED_REGEXP

497 }	512 }

(...skipping 11 matching lines...) Expand all Loading...
509 if (FLAG_trace_regexp_bytecodes) {	524 if (FLAG_trace_regexp_bytecodes) {

510 String* pattern = jsregexp->Pattern();	525 String* pattern = jsregexp->Pattern();

511 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));	526 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));

512 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));	527 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));

513 }	528 }

514 #endif	529 #endif

515 #endif	530 #endif

516 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);	531 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);

517 if (required_registers < 0) {	532 if (required_registers < 0) {

518 // Compiling failed with an exception.	533 // Compiling failed with an exception.

519 ASSERT(Top::has_pending_exception());	534 ASSERT(Isolate::Current()->has_pending_exception());

520 return Handle<Object>::null();	535 return Handle<Object>::null();

521 }	536 }

522	537

523 OffsetsVector registers(required_registers);	538 OffsetsVector registers(required_registers);

524	539

525 IrregexpResult res = RegExpImpl::IrregexpExecOnce(	540 IrregexpResult res = RegExpImpl::IrregexpExecOnce(

526 jsregexp, subject, previous_index, Vector<int>(registers.vector(),	541 jsregexp, subject, previous_index, Vector<int>(registers.vector(),

527 registers.length()));	542 registers.length()));

528 if (res == RE_SUCCESS) {	543 if (res == RE_SUCCESS) {

529 int capture_register_count =	544 int capture_register_count =

530 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;	545 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;

531 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);	546 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);

532 AssertNoAllocation no_gc;	547 AssertNoAllocation no_gc;

533 int* register_vector = registers.vector();	548 int* register_vector = registers.vector();

534 FixedArray* array = FixedArray::cast(last_match_info->elements());	549 FixedArray* array = FixedArray::cast(last_match_info->elements());

535 for (int i = 0; i < capture_register_count; i += 2) {	550 for (int i = 0; i < capture_register_count; i += 2) {

536 SetCapture(array, i, register_vector[i]);	551 SetCapture(array, i, register_vector[i]);

537 SetCapture(array, i + 1, register_vector[i + 1]);	552 SetCapture(array, i + 1, register_vector[i + 1]);

538 }	553 }

539 SetLastCaptureCount(array, capture_register_count);	554 SetLastCaptureCount(array, capture_register_count);

540 SetLastSubject(array, *subject);	555 SetLastSubject(array, *subject);

541 SetLastInput(array, *subject);	556 SetLastInput(array, *subject);

542 return last_match_info;	557 return last_match_info;

543 }	558 }

544 if (res == RE_EXCEPTION) {	559 if (res == RE_EXCEPTION) {

545 ASSERT(Top::has_pending_exception());	560 ASSERT(Isolate::Current()->has_pending_exception());

546 return Handle<Object>::null();	561 return Handle<Object>::null();

547 }	562 }

548 ASSERT(res == RE_FAILURE);	563 ASSERT(res == RE_FAILURE);

549 return Factory::null_value();	564 return Isolate::Current()->factory()->null_value();

550 }	565 }

551	566

552	567

553 // -------------------------------------------------------------------	568 // -------------------------------------------------------------------

554 // Implementation of the Irregexp regular expression engine.	569 // Implementation of the Irregexp regular expression engine.

555 //	570 //

556 // The Irregexp regular expression engine is intended to be a complete	571 // The Irregexp regular expression engine is intended to be a complete

557 // implementation of ECMAScript regular expressions. It generates either	572 // implementation of ECMAScript regular expressions. It generates either

558 // bytecodes or native code.	573 // bytecodes or native code.

559	574

(...skipping 739 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1299 case Guard::GEQ:	1314 case Guard::GEQ:

1300 ASSERT(!trace->mentions_reg(guard->reg()));	1315 ASSERT(!trace->mentions_reg(guard->reg()));

1301 macro_assembler->IfRegisterLT(guard->reg(),	1316 macro_assembler->IfRegisterLT(guard->reg(),

1302 guard->value(),	1317 guard->value(),

1303 trace->backtrack());	1318 trace->backtrack());

1304 break;	1319 break;

1305 }	1320 }

1306 }	1321 }

1307	1322

1308	1323

1309 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;

1310 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;

1311

1312

1313 // Returns the number of characters in the equivalence class, omitting those	1324 // Returns the number of characters in the equivalence class, omitting those

1314 // that cannot occur in the source string because it is ASCII.	1325 // that cannot occur in the source string because it is ASCII.

1315 static int GetCaseIndependentLetters(uc16 character,	1326 static int GetCaseIndependentLetters(Isolate* isolate,

	1327 uc16 character,

1316 bool ascii_subject,	1328 bool ascii_subject,

1317 unibrow::uchar* letters) {	1329 unibrow::uchar* letters) {

1318 int length = uncanonicalize.get(character, '\0', letters);	1330 int length =

	1331 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);

1319 // Unibrow returns 0 or 1 for characters where case independence is	1332 // Unibrow returns 0 or 1 for characters where case independence is

1320 // trivial.	1333 // trivial.

1321 if (length == 0) {	1334 if (length == 0) {

1322 letters[0] = character;	1335 letters[0] = character;

1323 length = 1;	1336 length = 1;

1324 }	1337 }

1325 if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {	1338 if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {

1326 return length;	1339 return length;

1327 }	1340 }

1328 // The standard requires that non-ASCII characters cannot have ASCII	1341 // The standard requires that non-ASCII characters cannot have ASCII

1329 // character codes in their equivalence class.	1342 // character codes in their equivalence class.

1330 return 0;	1343 return 0;

1331 }	1344 }

1332	1345

1333	1346

1334 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler,	1347 static inline bool EmitSimpleCharacter(Isolate* isolate,

	1348 RegExpCompiler* compiler,

1335 uc16 c,	1349 uc16 c,

1336 Label* on_failure,	1350 Label* on_failure,

1337 int cp_offset,	1351 int cp_offset,

1338 bool check,	1352 bool check,

1339 bool preloaded) {	1353 bool preloaded) {

1340 RegExpMacroAssembler* assembler = compiler->macro_assembler();	1354 RegExpMacroAssembler* assembler = compiler->macro_assembler();

1341 bool bound_checked = false;	1355 bool bound_checked = false;

1342 if (!preloaded) {	1356 if (!preloaded) {

1343 assembler->LoadCurrentCharacter(	1357 assembler->LoadCurrentCharacter(

1344 cp_offset,	1358 cp_offset,

1345 on_failure,	1359 on_failure,

1346 check);	1360 check);

1347 bound_checked = true;	1361 bound_checked = true;

1348 }	1362 }

1349 assembler->CheckNotCharacter(c, on_failure);	1363 assembler->CheckNotCharacter(c, on_failure);

1350 return bound_checked;	1364 return bound_checked;

1351 }	1365 }

1352	1366

1353	1367

1354 // Only emits non-letters (things that don't have case). Only used for case	1368 // Only emits non-letters (things that don't have case). Only used for case

1355 // independent matches.	1369 // independent matches.

1356 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler,	1370 static inline bool EmitAtomNonLetter(Isolate* isolate,

	1371 RegExpCompiler* compiler,

1357 uc16 c,	1372 uc16 c,

1358 Label* on_failure,	1373 Label* on_failure,

1359 int cp_offset,	1374 int cp_offset,

1360 bool check,	1375 bool check,

1361 bool preloaded) {	1376 bool preloaded) {

1362 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();	1377 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1363 bool ascii = compiler->ascii();	1378 bool ascii = compiler->ascii();

1364 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1379 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1365 int length = GetCaseIndependentLetters(c, ascii, chars);	1380 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1366 if (length < 1) {	1381 if (length < 1) {

1367 // This can't match. Must be an ASCII subject and a non-ASCII character.	1382 // This can't match. Must be an ASCII subject and a non-ASCII character.

1368 // We do not need to do anything since the ASCII pass already handled this.	1383 // We do not need to do anything since the ASCII pass already handled this.

1369 return false; // Bounds not checked.	1384 return false; // Bounds not checked.

1370 }	1385 }

1371 bool checked = false;	1386 bool checked = false;

1372 // We handle the length > 1 case in a later pass.	1387 // We handle the length > 1 case in a later pass.

1373 if (length == 1) {	1388 if (length == 1) {

1374 if (ascii && c > String::kMaxAsciiCharCodeU) {	1389 if (ascii && c > String::kMaxAsciiCharCodeU) {

1375 // Can't match - see above.	1390 // Can't match - see above.

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1417 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,	1432 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

1418 diff,	1433 diff,

1419 mask,	1434 mask,

1420 on_failure);	1435 on_failure);

1421 return true;	1436 return true;

1422 }	1437 }

1423 return false;	1438 return false;

1424 }	1439 }

1425	1440

1426	1441

1427 typedef bool EmitCharacterFunction(RegExpCompiler* compiler,	1442 typedef bool EmitCharacterFunction(Isolate* isolate,

	1443 RegExpCompiler* compiler,

1428 uc16 c,	1444 uc16 c,

1429 Label* on_failure,	1445 Label* on_failure,

1430 int cp_offset,	1446 int cp_offset,

1431 bool check,	1447 bool check,

1432 bool preloaded);	1448 bool preloaded);

1433	1449

1434 // Only emits letters (things that have case). Only used for case independent	1450 // Only emits letters (things that have case). Only used for case independent

1435 // matches.	1451 // matches.

1436 static inline bool EmitAtomLetter(RegExpCompiler* compiler,	1452 static inline bool EmitAtomLetter(Isolate* isolate,

	1453 RegExpCompiler* compiler,

1437 uc16 c,	1454 uc16 c,

1438 Label* on_failure,	1455 Label* on_failure,

1439 int cp_offset,	1456 int cp_offset,

1440 bool check,	1457 bool check,

1441 bool preloaded) {	1458 bool preloaded) {

1442 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();	1459 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1443 bool ascii = compiler->ascii();	1460 bool ascii = compiler->ascii();

1444 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1461 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1445 int length = GetCaseIndependentLetters(c, ascii, chars);	1462 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1446 if (length <= 1) return false;	1463 if (length <= 1) return false;

1447 // We may not need to check against the end of the input string	1464 // We may not need to check against the end of the input string

1448 // if this character lies before a character that matched.	1465 // if this character lies before a character that matched.

1449 if (!preloaded) {	1466 if (!preloaded) {

1450 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1467 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1451 }	1468 }

1452 Label ok;	1469 Label ok;

1453 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);	1470 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

1454 switch (length) {	1471 switch (length) {

1455 case 2: {	1472 case 2: {

(...skipping 417 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1873 //	1890 //

1874 // We iterate along the text object, building up for each character a	1891 // We iterate along the text object, building up for each character a

1875 // mask and value that can be used to test for a quick failure to match.	1892 // mask and value that can be used to test for a quick failure to match.

1876 // The masks and values for the positions will be combined into a single	1893 // The masks and values for the positions will be combined into a single

1877 // machine word for the current character width in order to be used in	1894 // machine word for the current character width in order to be used in

1878 // generating a quick check.	1895 // generating a quick check.

1879 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,	1896 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

1880 RegExpCompiler* compiler,	1897 RegExpCompiler* compiler,

1881 int characters_filled_in,	1898 int characters_filled_in,

1882 bool not_at_start) {	1899 bool not_at_start) {

	1900 Isolate* isolate = Isolate::Current();

1883 ASSERT(characters_filled_in < details->characters());	1901 ASSERT(characters_filled_in < details->characters());

1884 int characters = details->characters();	1902 int characters = details->characters();

1885 int char_mask;	1903 int char_mask;

1886 int char_shift;	1904 int char_shift;

1887 if (compiler->ascii()) {	1905 if (compiler->ascii()) {

1888 char_mask = String::kMaxAsciiCharCode;	1906 char_mask = String::kMaxAsciiCharCode;

1889 char_shift = 8;	1907 char_shift = 8;

1890 } else {	1908 } else {

1891 char_mask = String::kMaxUC16CharCode;	1909 char_mask = String::kMaxUC16CharCode;

1892 char_shift = 16;	1910 char_shift = 16;

(...skipping 10 matching lines...) Expand all Loading...
1903 // If we expect a non-ASCII character from an ASCII string,	1921 // If we expect a non-ASCII character from an ASCII string,

1904 // there is no way we can match. Not even case independent	1922 // there is no way we can match. Not even case independent

1905 // matching can turn an ASCII character into non-ASCII or	1923 // matching can turn an ASCII character into non-ASCII or

1906 // vice versa.	1924 // vice versa.

1907 details->set_cannot_match();	1925 details->set_cannot_match();

1908 pos->determines_perfectly = false;	1926 pos->determines_perfectly = false;

1909 return;	1927 return;

1910 }	1928 }

1911 if (compiler->ignore_case()) {	1929 if (compiler->ignore_case()) {

1912 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1930 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1913 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars);	1931 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),

	1932 chars);

1914 ASSERT(length != 0); // Can only happen if c > char_mask (see above).	1933 ASSERT(length != 0); // Can only happen if c > char_mask (see above).

1915 if (length == 1) {	1934 if (length == 1) {

1916 // This letter has no case equivalents, so it's nice and simple	1935 // This letter has no case equivalents, so it's nice and simple

1917 // and the mask-compare will determine definitely whether we have	1936 // and the mask-compare will determine definitely whether we have

1918 // a match at this character position.	1937 // a match at this character position.

1919 pos->mask = char_mask;	1938 pos->mask = char_mask;

1920 pos->value = c;	1939 pos->value = c;

1921 pos->determines_perfectly = true;	1940 pos->determines_perfectly = true;

1922 } else {	1941 } else {

1923 uint32_t common_bits = char_mask;	1942 uint32_t common_bits = char_mask;

(...skipping 479 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2403 // loading characters, which means we do not need to recheck the bounds	2422 // loading characters, which means we do not need to recheck the bounds

2404 // up to the limit the quick check already checked. In addition the quick	2423 // up to the limit the quick check already checked. In addition the quick

2405 // check can have involved a mask and compare operation which may simplify	2424 // check can have involved a mask and compare operation which may simplify

2406 // or obviate the need for further checks at some character positions.	2425 // or obviate the need for further checks at some character positions.

2407 void TextNode::TextEmitPass(RegExpCompiler* compiler,	2426 void TextNode::TextEmitPass(RegExpCompiler* compiler,

2408 TextEmitPassType pass,	2427 TextEmitPassType pass,

2409 bool preloaded,	2428 bool preloaded,

2410 Trace* trace,	2429 Trace* trace,

2411 bool first_element_checked,	2430 bool first_element_checked,

2412 int* checked_up_to) {	2431 int* checked_up_to) {

	2432 Isolate* isolate = Isolate::Current();

2413 RegExpMacroAssembler* assembler = compiler->macro_assembler();	2433 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2414 bool ascii = compiler->ascii();	2434 bool ascii = compiler->ascii();

2415 Label* backtrack = trace->backtrack();	2435 Label* backtrack = trace->backtrack();

2416 QuickCheckDetails* quick_check = trace->quick_check_performed();	2436 QuickCheckDetails* quick_check = trace->quick_check_performed();

2417 int element_count = elms_->length();	2437 int element_count = elms_->length();

2418 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {	2438 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

2419 TextElement elm = elms_->at(i);	2439 TextElement elm = elms_->at(i);

2420 int cp_offset = trace->cp_offset() + elm.cp_offset;	2440 int cp_offset = trace->cp_offset() + elm.cp_offset;

2421 if (elm.type == TextElement::ATOM) {	2441 if (elm.type == TextElement::ATOM) {

2422 Vector<const uc16> quarks = elm.data.u_atom->data();	2442 Vector<const uc16> quarks = elm.data.u_atom->data();

(...skipping 15 matching lines...) Expand all Loading...
2438 case SIMPLE_CHARACTER_MATCH:	2458 case SIMPLE_CHARACTER_MATCH:

2439 emit_function = &EmitSimpleCharacter;	2459 emit_function = &EmitSimpleCharacter;

2440 break;	2460 break;

2441 case CASE_CHARACTER_MATCH:	2461 case CASE_CHARACTER_MATCH:

2442 emit_function = &EmitAtomLetter;	2462 emit_function = &EmitAtomLetter;

2443 break;	2463 break;

2444 default:	2464 default:

2445 break;	2465 break;

2446 }	2466 }

2447 if (emit_function != NULL) {	2467 if (emit_function != NULL) {

2448 bool bound_checked = emit_function(compiler,	2468 bool bound_checked = emit_function(isolate,

	2469 compiler,

2449 quarks[j],	2470 quarks[j],

2450 backtrack,	2471 backtrack,

2451 cp_offset + j,	2472 cp_offset + j,

2452 *checked_up_to < cp_offset + j,	2473 *checked_up_to < cp_offset + j,

2453 preloaded);	2474 preloaded);

2454 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);	2475 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

2455 }	2476 }

2456 }	2477 }

2457 } else {	2478 } else {

2458 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);	2479 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);

(...skipping 1619 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4078 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);	4099 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);

4079 for (int i = 0; i < overlay.length(); i += 2) {	4100 for (int i = 0; i < overlay.length(); i += 2) {

4080 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),	4101 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),

4081 CharacterRangeSplitter::kInOverlay);	4102 CharacterRangeSplitter::kInOverlay);

4082 }	4103 }

4083 CharacterRangeSplitter callback(included, excluded);	4104 CharacterRangeSplitter callback(included, excluded);

4084 table.ForEach(&callback);	4105 table.ForEach(&callback);

4085 }	4106 }

4086	4107

4087	4108

4088 static void AddUncanonicals(ZoneList<CharacterRange>* ranges,	4109 static void AddUncanonicals(Isolate* isolate,

	4110 ZoneList<CharacterRange>* ranges,

4089 int bottom,	4111 int bottom,

4090 int top);	4112 int top);

4091	4113

4092	4114

4093 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,	4115 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

4094 bool is_ascii) {	4116 bool is_ascii) {

	4117 Isolate* isolate = Isolate::Current();

4095 uc16 bottom = from();	4118 uc16 bottom = from();

4096 uc16 top = to();	4119 uc16 top = to();

4097 if (is_ascii) {	4120 if (is_ascii) {

4098 if (bottom > String::kMaxAsciiCharCode) return;	4121 if (bottom > String::kMaxAsciiCharCode) return;

4099 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;	4122 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;

4100 }	4123 }

4101 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4124 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4102 if (top == bottom) {	4125 if (top == bottom) {

4103 // If this is a singleton we just expand the one character.	4126 // If this is a singleton we just expand the one character.

4104 int length = uncanonicalize.get(bottom, '\0', chars);	4127 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

4105 for (int i = 0; i < length; i++) {	4128 for (int i = 0; i < length; i++) {

4106 uc32 chr = chars[i];	4129 uc32 chr = chars[i];

4107 if (chr != bottom) {	4130 if (chr != bottom) {

4108 ranges->Add(CharacterRange::Singleton(chars[i]));	4131 ranges->Add(CharacterRange::Singleton(chars[i]));

4109 }	4132 }

4110 }	4133 }

4111 } else {	4134 } else {

4112 // If this is a range we expand the characters block by block,	4135 // If this is a range we expand the characters block by block,

4113 // expanding contiguous subranges (blocks) one at a time.	4136 // expanding contiguous subranges (blocks) one at a time.

4114 // The approach is as follows. For a given start character we	4137 // The approach is as follows. For a given start character we

4115 // look up the remainder of the block that contains it (represented	4138 // look up the remainder of the block that contains it (represented

4116 // by the end point), for instance we find 'z' if the character	4139 // by the end point), for instance we find 'z' if the character

4117 // is 'c'. A block is characterized by the property	4140 // is 'c'. A block is characterized by the property

4118 // that all characters uncanonicalize in the same way, except that	4141 // that all characters uncanonicalize in the same way, except that

4119 // each entry in the result is incremented by the distance from the first	4142 // each entry in the result is incremented by the distance from the first

4120 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and	4143 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and

4121 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].	4144 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].

4122 // Once we've found the end point we look up its uncanonicalization	4145 // Once we've found the end point we look up its uncanonicalization

4123 // and produce a range for each element. For instance for [c-f]	4146 // and produce a range for each element. For instance for [c-f]

4124 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only	4147 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only

4125 // add a range if it is not already contained in the input, so [c-f]	4148 // add a range if it is not already contained in the input, so [c-f]

4126 // will be skipped but [C-F] will be added. If this range is not	4149 // will be skipped but [C-F] will be added. If this range is not

4127 // completely contained in a block we do this for all the blocks	4150 // completely contained in a block we do this for all the blocks

4128 // covered by the range (handling characters that is not in a block	4151 // covered by the range (handling characters that is not in a block

4129 // as a "singleton block").	4152 // as a "singleton block").

4130 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4153 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4131 int pos = bottom;	4154 int pos = bottom;

4132 while (pos < top) {	4155 while (pos < top) {

4133 int length = canonrange.get(pos, '\0', range);	4156 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);

4134 uc16 block_end;	4157 uc16 block_end;

4135 if (length == 0) {	4158 if (length == 0) {

4136 block_end = pos;	4159 block_end = pos;

4137 } else {	4160 } else {

4138 ASSERT_EQ(1, length);	4161 ASSERT_EQ(1, length);

4139 block_end = range[0];	4162 block_end = range[0];

4140 }	4163 }

4141 int end = (block_end > top) ? top : block_end;	4164 int end = (block_end > top) ? top : block_end;

4142 length = uncanonicalize.get(block_end, '\0', range);	4165 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);

4143 for (int i = 0; i < length; i++) {	4166 for (int i = 0; i < length; i++) {

4144 uc32 c = range[i];	4167 uc32 c = range[i];

4145 uc16 range_from = c - (block_end - pos);	4168 uc16 range_from = c - (block_end - pos);

4146 uc16 range_to = c - (block_end - end);	4169 uc16 range_to = c - (block_end - end);

4147 if (!(bottom <= range_from && range_to <= top)) {	4170 if (!(bottom <= range_from && range_to <= top)) {

4148 ranges->Add(CharacterRange(range_from, range_to));	4171 ranges->Add(CharacterRange(range_from, range_to));

4149 }	4172 }

4150 }	4173 }

4151 pos = end + 1;	4174 pos = end + 1;

4152 }	4175 }

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4242 result.SetElementsInSecondSet();	4265 result.SetElementsInSecondSet();

4243 } else if (j < range->length()) {	4266 } else if (j < range->length()) {

4244 // Argument range contains something not in word range.	4267 // Argument range contains something not in word range.

4245 result.SetElementsInFirstSet();	4268 result.SetElementsInFirstSet();

4246 }	4269 }

4247	4270

4248 return result;	4271 return result;

4249 }	4272 }

4250	4273

4251	4274

4252 static void AddUncanonicals(ZoneList<CharacterRange>* ranges,	4275 static void AddUncanonicals(Isolate* isolate,

	4276 ZoneList<CharacterRange>* ranges,

4253 int bottom,	4277 int bottom,

4254 int top) {	4278 int top) {

4255 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4279 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4256 // Zones with no case mappings. There is a DEBUG-mode loop to assert that	4280 // Zones with no case mappings. There is a DEBUG-mode loop to assert that

4257 // this table is correct.	4281 // this table is correct.

4258 // 0x0600 - 0x0fff	4282 // 0x0600 - 0x0fff

4259 // 0x1100 - 0x1cff	4283 // 0x1100 - 0x1cff

4260 // 0x2000 - 0x20ff	4284 // 0x2000 - 0x20ff

4261 // 0x2200 - 0x23ff	4285 // 0x2200 - 0x23ff

4262 // 0x2500 - 0x2bff	4286 // 0x2500 - 0x2bff

(...skipping 11 matching lines...) Expand all Loading...
4274 if (top <= boundaries[0]) {	4298 if (top <= boundaries[0]) {

4275 CharacterRange range(bottom, top);	4299 CharacterRange range(bottom, top);

4276 range.AddCaseEquivalents(ranges, false);	4300 range.AddCaseEquivalents(ranges, false);

4277 return;	4301 return;

4278 }	4302 }

4279	4303

4280 // Split up very large ranges. This helps remove ranges where there are no	4304 // Split up very large ranges. This helps remove ranges where there are no

4281 // case mappings.	4305 // case mappings.

4282 for (int i = 0; i < boundary_count; i++) {	4306 for (int i = 0; i < boundary_count; i++) {

4283 if (bottom < boundaries[i] && top >= boundaries[i]) {	4307 if (bottom < boundaries[i] && top >= boundaries[i]) {

4284 AddUncanonicals(ranges, bottom, boundaries[i] - 1);	4308 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);

4285 AddUncanonicals(ranges, boundaries[i], top);	4309 AddUncanonicals(isolate, ranges, boundaries[i], top);

4286 return;	4310 return;

4287 }	4311 }

4288 }	4312 }

4289	4313

4290 // If we are completely in a zone with no case mappings then we are done.	4314 // If we are completely in a zone with no case mappings then we are done.

4291 for (int i = 0; i < boundary_count; i += 2) {	4315 for (int i = 0; i < boundary_count; i += 2) {

4292 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {	4316 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {

4293 #ifdef DEBUG	4317 #ifdef DEBUG

4294 for (int j = bottom; j <= top; j++) {	4318 for (int j = bottom; j <= top; j++) {

4295 unsigned current_char = j;	4319 unsigned current_char = j;

4296 int length = uncanonicalize.get(current_char, '\0', chars);	4320 int length = isolate->jsregexp_uncanonicalize()->get(current_char,

	4321 '\0', chars);

4297 for (int k = 0; k < length; k++) {	4322 for (int k = 0; k < length; k++) {

4298 ASSERT(chars[k] == current_char);	4323 ASSERT(chars[k] == current_char);

4299 }	4324 }

4300 }	4325 }

4301 #endif	4326 #endif

4302 return;	4327 return;

4303 }	4328 }

4304 }	4329 }

4305	4330

4306 // Step through the range finding equivalent characters.	4331 // Step through the range finding equivalent characters.

4307 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);	4332 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);

4308 for (int i = bottom; i <= top; i++) {	4333 for (int i = bottom; i <= top; i++) {

4309 int length = uncanonicalize.get(i, '\0', chars);	4334 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);

4310 for (int j = 0; j < length; j++) {	4335 for (int j = 0; j < length; j++) {

4311 uc32 chr = chars[j];	4336 uc32 chr = chars[j];

4312 if (chr != i && (chr < bottom \|\| chr > top)) {	4337 if (chr != i && (chr < bottom \|\| chr > top)) {

4313 characters->Add(chr);	4338 characters->Add(chr);

4314 }	4339 }

4315 }	4340 }

4316 }	4341 }

4317	4342

4318 // Step through the equivalent characters finding simple ranges and	4343 // Step through the equivalent characters finding simple ranges and

4319 // adding ranges to the character class.	4344 // adding ranges to the character class.

(...skipping 501 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4821 else	4846 else

4822 return empty();	4847 return empty();

4823 }	4848 }

4824	4849

4825	4850

4826 // -------------------------------------------------------------------	4851 // -------------------------------------------------------------------

4827 // Analysis	4852 // Analysis

4828	4853

4829	4854

4830 void Analysis::EnsureAnalyzed(RegExpNode* that) {	4855 void Analysis::EnsureAnalyzed(RegExpNode* that) {

4831 StackLimitCheck check;	4856 StackLimitCheck check(Isolate::Current());

4832 if (check.HasOverflowed()) {	4857 if (check.HasOverflowed()) {

4833 fail("Stack overflow");	4858 fail("Stack overflow");

4834 return;	4859 return;

4835 }	4860 }

4836 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)	4861 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)

4837 return;	4862 return;

4838 that->info()->being_analyzed = true;	4863 that->info()->being_analyzed = true;

4839 that->Accept(this);	4864 that->Accept(this);

4840 that->info()->being_analyzed = false;	4865 that->info()->being_analyzed = false;

4841 that->info()->been_analyzed = true;	4866 that->info()->been_analyzed = true;

(...skipping 489 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5331 macro_assembler.SetCurrentPositionFromEnd(max_length);	5356 macro_assembler.SetCurrentPositionFromEnd(max_length);

5332 }	5357 }

5333	5358

5334 return compiler.Assemble(&macro_assembler,	5359 return compiler.Assemble(&macro_assembler,

5335 node,	5360 node,

5336 data->capture_count,	5361 data->capture_count,

5337 pattern);	5362 pattern);

5338 }	5363 }

5339	5364

5340	5365

5341 int OffsetsVector::static_offsets_vector_[

5342 OffsetsVector::kStaticOffsetsVectorSize];

5343

5344 }} // namespace v8::internal	5366 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | src/jump-target-heavy.h » ('j') | no next file with comments »