src/jsregexp.cc - Issue 11000: Periodic merge of bleeding_edge to experimental code generator branch....

Side by Side Diff: src/jsregexp.cc

Issue 11000: Periodic merge of bleeding_edge to experimental code generator branch.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/

Patch Set: Created 12 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

11 // with the distribution.	11 // with the distribution.

12 // * Neither the name of Google Inc. nor the names of its	12 // * Neither the name of Google Inc. nor the names of its

13 // contributors may be used to endorse or promote products derived	13 // contributors may be used to endorse or promote products derived

14 // from this software without specific prior written permission.	14 // from this software without specific prior written permission.

15 //	15 //

16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

	28 #define _HAS_EXCEPTIONS 0

	29 #include <set>

	30

28 #include "v8.h"	31 #include "v8.h"

29	32

	33 #include "ast.h"

30 #include "execution.h"	34 #include "execution.h"

31 #include "factory.h"	35 #include "factory.h"

32 #include "jsregexp.h"	36 #include "jsregexp-inl.h"

33 #include "platform.h"	37 #include "platform.h"

34 #include "runtime.h"	38 #include "runtime.h"

35 #include "top.h"	39 #include "top.h"

36 #include "compilation-cache.h"	40 #include "compilation-cache.h"

	41 #include "string-stream.h"

	42 #include "parser.h"

	43 #include "regexp-macro-assembler.h"

	44 #include "regexp-macro-assembler-tracer.h"

	45 #include "regexp-macro-assembler-irregexp.h"

	46

	47 #ifdef ARM

	48 #include "regexp-macro-assembler-arm.h"

	49 #else // IA32

	50 #include "macro-assembler-ia32.h"

	51 #include "regexp-macro-assembler-ia32.h"

	52 #endif

	53

	54 #include "interpreter-irregexp.h"

37	55

38 // Including pcre.h undefines DEBUG to avoid getting debug output from	56 // Including pcre.h undefines DEBUG to avoid getting debug output from

39 // the JSCRE implementation. Make sure to redefine it in debug mode	57 // the JSCRE implementation. Make sure to redefine it in debug mode

40 // after having included the header file.	58 // after having included the header file.

41 #ifdef DEBUG	59 #ifdef DEBUG

42 #include "third_party/jscre/pcre.h"	60 #include "third_party/jscre/pcre.h"

43 #define DEBUG	61 #define DEBUG

44 #else	62 #else

45 #include "third_party/jscre/pcre.h"	63 #include "third_party/jscre/pcre.h"

46 #endif	64 #endif

47	65

	66

48 namespace v8 { namespace internal {	67 namespace v8 { namespace internal {

49	68

50	69

51 #define CAPTURE_INDEX 0

52 #define INTERNAL_INDEX 1

53

54 static Failure* malloc_failure;	70 static Failure* malloc_failure;

55	71

56 static void* JSREMalloc(size_t size) {	72 static void* JSREMalloc(size_t size) {

57 Object* obj = Heap::AllocateByteArray(size);	73 Object* obj = Heap::AllocateByteArray(size);

58	74

59 // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile	75 // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile

60 // will return NULL to the caller, performs GC there.	76 // will return NULL to the caller, performs GC there.

61 // Also pass failure information to the caller.	77 // Also pass failure information to the caller.

62 if (obj->IsFailure()) {	78 if (obj->IsFailure()) {

63 malloc_failure = Failure::cast(obj);	79 malloc_failure = Failure::cast(obj);

(...skipping 105 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
169 break;	185 break;

170 case 'm':	186 case 'm':

171 flags \|= JSRegExp::MULTILINE;	187 flags \|= JSRegExp::MULTILINE;

172 break;	188 break;

173 }	189 }

174 }	190 }

175 return JSRegExp::Flags(flags);	191 return JSRegExp::Flags(flags);

176 }	192 }

177	193

178	194

179 unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char;	195 static inline void ThrowRegExpException(Handle<JSRegExp> re,

	196 Handle<String> pattern,

	197 Handle<String> error_text,

	198 const char* message) {

	199 Handle<JSArray> array = Factory::NewJSArray(2);

	200 SetElement(array, 0, pattern);

	201 SetElement(array, 1, error_text);

	202 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);

	203 Top::Throw(*regexp_err);

	204 }

180	205

181	206

182 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,	207 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,

183 Handle<String> pattern,	208 Handle<String> pattern,

184 Handle<String> flag_str) {	209 Handle<String> flag_str) {

185 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);	210 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);

186 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);	211 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);

187 bool in_cache = !cached.is_null();	212 bool in_cache = !cached.is_null();

188 Handle<Object> result;	213 Handle<Object> result;

189 StringShape shape(*pattern);

190 if (in_cache) {	214 if (in_cache) {

191 re->set_data(*cached);	215 re->set_data(*cached);

192 result = re;	216 result = re;

193 } else {	217 } else {

194 bool is_atom = !flags.is_ignore_case();	218 FlattenString(pattern);

195 for (int i = 0; is_atom && i < pattern->length(shape); i++) {	219 RegExpParseResult parse_result;

196 if (is_reg_exp_special_char.get(pattern->Get(shape, i)))	220 FlatStringReader reader(pattern);

197 is_atom = false;	221 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {

	222 // Throw an exception if we fail to parse the pattern.

	223 ThrowRegExpException(re,

	224 pattern,

	225 parse_result.error,

	226 "malformed_regexp");

	227 return Handle<Object>();

198 }	228 }

199 if (is_atom) {	229 RegExpAtom* atom = parse_result.tree->AsAtom();

200 result = AtomCompile(re, pattern, flags);	230 if (atom != NULL && !flags.is_ignore_case()) {

	231 if (parse_result.has_character_escapes) {

	232 Vector<const uc16> atom_pattern = atom->data();

	233 Handle<String> atom_string =

	234 Factory::NewStringFromTwoByte(atom_pattern);

	235 result = AtomCompile(re, pattern, flags, atom_string);

	236 } else {

	237 result = AtomCompile(re, pattern, flags, pattern);

	238 }

201 } else {	239 } else {

202 result = JsreCompile(re, pattern, flags);	240 RegExpNode* node = NULL;

	241 Handle<FixedArray> irregexp_data =

	242 RegExpEngine::Compile(&parse_result,

	243 &node,

	244 flags.is_ignore_case(),

	245 flags.is_multiline());

	246 if (irregexp_data.is_null()) {

	247 if (FLAG_disable_jscre) {

	248 UNIMPLEMENTED();

	249 }

	250 result = JscrePrepare(re, pattern, flags);

	251 } else {

	252 result = IrregexpPrepare(re, pattern, flags, irregexp_data);

	253 }

203 }	254 }

204 Object* data = re->data();	255 Object* data = re->data();

205 if (data->IsFixedArray()) {	256 if (data->IsFixedArray()) {

206 // If compilation succeeded then the data is set on the regexp	257 // If compilation succeeded then the data is set on the regexp

207 // and we can store it in the cache.	258 // and we can store it in the cache.

208 Handle<FixedArray> data(FixedArray::cast(re->data()));	259 Handle<FixedArray> data(FixedArray::cast(re->data()));

209 CompilationCache::PutRegExp(pattern, flags, data);	260 CompilationCache::PutRegExp(pattern, flags, data);

210 }	261 }

211 }	262 }

212	263

213 LOG(RegExpCompileEvent(re, in_cache));	264 LOG(RegExpCompileEvent(re, in_cache));

214 return result;	265 return result;

215 }	266 }

216	267

217	268

218 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,	269 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,

219 Handle<String> subject,	270 Handle<String> subject,

220 Handle<Object> index) {	271 Handle<Object> index) {

221 switch (regexp->TypeTag()) {	272 switch (regexp->TypeTag()) {

222 case JSRegExp::JSCRE:	273 case JSRegExp::JSCRE:

223 return JsreExec(regexp, subject, index);	274 if (FLAG_disable_jscre) {

	275 UNIMPLEMENTED();

	276 }

	277 return JscreExec(regexp, subject, index);

224 case JSRegExp::ATOM:	278 case JSRegExp::ATOM:

225 return AtomExec(regexp, subject, index);	279 return AtomExec(regexp, subject, index);

	280 case JSRegExp::IRREGEXP:

	281 return IrregexpExec(regexp, subject, index);

226 default:	282 default:

227 UNREACHABLE();	283 UNREACHABLE();

228 return Handle<Object>();	284 return Handle<Object>();

229 }	285 }

230 }	286 }

231	287

232	288

233 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,	289 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,

234 Handle<String> subject) {	290 Handle<String> subject) {

235 switch (regexp->TypeTag()) {	291 switch (regexp->TypeTag()) {

236 case JSRegExp::JSCRE:	292 case JSRegExp::JSCRE:

237 return JsreExecGlobal(regexp, subject);	293 if (FLAG_disable_jscre) {

	294 UNIMPLEMENTED();

	295 }

	296 return JscreExecGlobal(regexp, subject);

238 case JSRegExp::ATOM:	297 case JSRegExp::ATOM:

239 return AtomExecGlobal(regexp, subject);	298 return AtomExecGlobal(regexp, subject);

	299 case JSRegExp::IRREGEXP:

	300 return IrregexpExecGlobal(regexp, subject);

240 default:	301 default:

241 UNREACHABLE();	302 UNREACHABLE();

242 return Handle<Object>();	303 return Handle<Object>();

243 }	304 }

244 }	305 }

245	306

246	307

247 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,	308 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,

248 Handle<String> pattern,	309 Handle<String> pattern,

249 JSRegExp::Flags flags) {	310 JSRegExp::Flags flags,

250 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, pattern);	311 Handle<String> match_pattern) {

	312 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);

251 return re;	313 return re;

252 }	314 }

253	315

254	316

255 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,	317 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,

256 Handle<String> subject,	318 Handle<String> subject,

257 Handle<Object> index) {	319 Handle<Object> index) {

258 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));	320 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));

259	321

260 uint32_t start_index;	322 uint32_t start_index;

261 if (!Array::IndexFromObject(*index, &start_index)) {	323 if (!Array::IndexFromObject(*index, &start_index)) {

262 return Handle<Smi>(Smi::FromInt(-1));	324 return Handle<Smi>(Smi::FromInt(-1));

263 }	325 }

264	326

265 LOG(RegExpExecEvent(re, start_index, subject));	327 LOG(RegExpExecEvent(re, start_index, subject));

266 int value = Runtime::StringMatch(subject, needle, start_index);	328 int value = Runtime::StringMatch(subject, needle, start_index);

267 if (value == -1) return Factory::null_value();	329 if (value == -1) return Factory::null_value();

268	330

269 Handle<FixedArray> array = Factory::NewFixedArray(2);	331 Handle<FixedArray> array = Factory::NewFixedArray(2);

270 array->set(0,	332 array->set(0, Smi::FromInt(value));

271 Smi::FromInt(value),	333 array->set(1, Smi::FromInt(value + needle->length()));

272 SKIP_WRITE_BARRIER);

273 array->set(1,

274 Smi::FromInt(value + needle->length()),

275 SKIP_WRITE_BARRIER);

276 return Factory::NewJSArrayWithElements(array);	334 return Factory::NewJSArrayWithElements(array);

277 }	335 }

278	336

279	337

280 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,	338 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,

281 Handle<String> subject) {	339 Handle<String> subject) {

282 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));	340 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));

283 Handle<JSArray> result = Factory::NewJSArray(1);	341 Handle<JSArray> result = Factory::NewJSArray(1);

284 int index = 0;	342 int index = 0;

285 int match_count = 0;	343 int match_count = 0;

286 int subject_length = subject->length();	344 int subject_length = subject->length();

287 int needle_length = needle->length();	345 int needle_length = needle->length();

288 while (true) {	346 while (true) {

289 LOG(RegExpExecEvent(re, index, subject));	347 LOG(RegExpExecEvent(re, index, subject));

290 int value = -1;	348 int value = -1;

291 if (index + needle_length <= subject_length) {	349 if (index + needle_length <= subject_length) {

292 value = Runtime::StringMatch(subject, needle, index);	350 value = Runtime::StringMatch(subject, needle, index);

293 }	351 }

294 if (value == -1) break;	352 if (value == -1) break;

295 HandleScope scope;	353 HandleScope scope;

296 int end = value + needle_length;	354 int end = value + needle_length;

297	355

298 Handle<FixedArray> array = Factory::NewFixedArray(2);	356 Handle<FixedArray> array = Factory::NewFixedArray(2);

299 array->set(0,	357 array->set(0, Smi::FromInt(value));

300 Smi::FromInt(value),	358 array->set(1, Smi::FromInt(end));

301 SKIP_WRITE_BARRIER);

302 array->set(1,

303 Smi::FromInt(end),

304 SKIP_WRITE_BARRIER);

305 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);	359 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);

306 SetElement(result, match_count, pair);	360 SetElement(result, match_count, pair);

307 match_count++;	361 match_count++;

308 index = end;	362 index = end;

309 if (needle_length == 0) index++;	363 if (needle_length == 0) index++;

310 }	364 }

311 return result;	365 return result;

312 }	366 }

313	367

314	368

	369 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,

	370 Handle<String> pattern,

	371 JSRegExp::Flags flags) {

	372 Handle<Object> value(Heap::undefined_value());

	373 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);

	374 return re;

	375 }

	376

	377

	378 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,

	379 Handle<String> pattern,

	380 JSRegExp::Flags flags,

	381 Handle<FixedArray> irregexp_data) {

	382 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data);

	383 return re;

	384 }

	385

	386

315 static inline Object* DoCompile(String* pattern,	387 static inline Object* DoCompile(String* pattern,

316 JSRegExp::Flags flags,	388 JSRegExp::Flags flags,

317 unsigned* number_of_captures,	389 unsigned* number_of_captures,

318 const char** error_message,	390 const char** error_message,

319 JscreRegExp** code) {	391 v8::jscre::JscreRegExp** code) {

320 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()	392 v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()

321 ? JSRegExpIgnoreCase	393 ? v8::jscre::JSRegExpIgnoreCase

322 : JSRegExpDoNotIgnoreCase;	394 : v8::jscre::JSRegExpDoNotIgnoreCase;

323 JSRegExpMultilineOption multiline_option = flags.is_multiline()	395 v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline()

324 ? JSRegExpMultiline	396 ? v8::jscre::JSRegExpMultiline

325 : JSRegExpSingleLine;	397 : v8::jscre::JSRegExpSingleLine;

326 *error_message = NULL;	398 *error_message = NULL;

327 malloc_failure = Failure::Exception();	399 malloc_failure = Failure::Exception();

328 *code = jsRegExpCompile(pattern->GetTwoByteData(),	400 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(),

329 pattern->length(),	401 pattern->length(),

330 case_option,	402 case_option,

331 multiline_option,	403 multiline_option,

332 number_of_captures,	404 number_of_captures,

333 error_message,	405 error_message,

334 &JSREMalloc,	406 &JSREMalloc,

335 &JSREFree);	407 &JSREFree);

336 if (*code == NULL && (malloc_failure->IsRetryAfterGC() \|\|	408 if (*code == NULL && (malloc_failure->IsRetryAfterGC() \|\|

337 malloc_failure->IsOutOfMemoryFailure())) {	409 malloc_failure->IsOutOfMemoryFailure())) {

338 return malloc_failure;	410 return malloc_failure;

339 } else {	411 } else {

340 // It doesn't matter which object we return here, we just need to return	412 // It doesn't matter which object we return here, we just need to return

341 // a non-failure to indicate to the GC-retry code that there was no	413 // a non-failure to indicate to the GC-retry code that there was no

342 // allocation failure.	414 // allocation failure.

343 return pattern;	415 return pattern;

344 }	416 }

345 }	417 }

346	418

347	419

348 void CompileWithRetryAfterGC(Handle<String> pattern,	420 void CompileWithRetryAfterGC(Handle<String> pattern,

349 JSRegExp::Flags flags,	421 JSRegExp::Flags flags,

350 unsigned* number_of_captures,	422 unsigned* number_of_captures,

351 const char** error_message,	423 const char** error_message,

352 JscreRegExp** code) {	424 v8::jscre::JscreRegExp** code) {

353 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,	425 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,

354 flags,	426 flags,

355 number_of_captures,	427 number_of_captures,

356 error_message,	428 error_message,

357 code));	429 code));

358 }	430 }

359	431

360	432

361 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,	433 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {

362 Handle<String> pattern,	434 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);

363 JSRegExp::Flags flags) {	435 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());

	436

	437 Handle<String> pattern(re->Pattern());

	438 JSRegExp::Flags flags = re->GetFlags();

	439

364 Handle<String> two_byte_pattern = StringToTwoByte(pattern);	440 Handle<String> two_byte_pattern = StringToTwoByte(pattern);

365	441

366 unsigned number_of_captures;	442 unsigned number_of_captures;

367 const char* error_message = NULL;	443 const char* error_message = NULL;

368	444

369 JscreRegExp* code = NULL;	445 v8::jscre::JscreRegExp* code = NULL;

370 FlattenString(pattern);	446 FlattenString(pattern);

371	447

372 CompileWithRetryAfterGC(two_byte_pattern,	448 CompileWithRetryAfterGC(two_byte_pattern,

373 flags,	449 flags,

374 &number_of_captures,	450 &number_of_captures,

375 &error_message,	451 &error_message,

376 &code);	452 &code);

377	453

378 if (code == NULL) {	454 if (code == NULL) {

379 // Throw an exception.	455 // Throw an exception.

380 Handle<JSArray> array = Factory::NewJSArray(2);	456 Handle<JSArray> array = Factory::NewJSArray(2);

381 SetElement(array, 0, pattern);	457 SetElement(array, 0, pattern);

382 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(	458 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(

383 (error_message == NULL) ? "Unknown regexp error" : error_message)));	459 (error_message == NULL) ? "Unknown regexp error" : error_message)));

384 Handle<Object> regexp_err =	460 Handle<Object> regexp_err =

385 Factory::NewSyntaxError("malformed_regexp", array);	461 Factory::NewSyntaxError("malformed_regexp", array);

386 Top::Throw(*regexp_err);	462 Top::Throw(*regexp_err);

387 return Handle<Object>();	463 return Handle<Object>();

388 }	464 }

389	465

390 // Convert the return address to a ByteArray pointer.	466 // Convert the return address to a ByteArray pointer.

391 Handle<ByteArray> internal(	467 Handle<ByteArray> internal(

392 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));	468 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));

393	469

394 Handle<FixedArray> value = Factory::NewFixedArray(2);	470 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);

395 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));	471 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures));

396 value->set(INTERNAL_INDEX, *internal);	472 value->set(kJscreInternalIndex, *internal);

397 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);	473 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);

398	474

399 return re;	475 return re;

400 }	476 }

401	477

402	478

403 Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,	479 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,

404 int num_captures,	480 int num_captures,

405 Handle<String> subject,	481 Handle<String> two_byte_subject,

406 int previous_index,	482 int previous_index,

407 const uc16* two_byte_subject,	483 int* offsets_vector,

408 int* offsets_vector,	484 int offsets_vector_length) {

409 int offsets_vector_length) {	485 #ifdef DEBUG

	486 if (FLAG_trace_regexp_bytecodes) {

	487 String* pattern = regexp->Pattern();

	488 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));

	489 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));

	490 }

	491 #endif

	492 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());

	493 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));

	494 bool rc;

	495

	496 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {

	497 offsets_vector[i] = -1;

	498 }

	499

	500 LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));

	501

	502 FixedArray* irregexp =

	503 FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));

	504 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();

	505

	506 switch (tag) {

	507 case RegExpMacroAssembler::kIA32Implementation: {

	508 Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));

	509 Address start_addr =

	510 Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();

	511 int string_offset =

	512 start_addr - reinterpret_cast<Address>(*two_byte_subject);

	513 int start_offset = string_offset + previous_index * sizeof(uc16);

	514 int end_offset =

	515 string_offset + two_byte_subject->length() * sizeof(uc16);

	516 typedef bool testfunc(String*, int, int, int);

	517 testfunc* test = FUNCTION_CAST<testfunc*>(code->entry());

	518 rc = test(two_byte_subject.location(),

	519 start_offset,

	520 end_offset,

	521 offsets_vector);

	522 if (rc) {

	523 // Capture values are relative to start_offset only.

	524 for (int i = 0; i < offsets_vector_length; i++) {

	525 if (offsets_vector[i] >= 0) {

	526 offsets_vector[i] += previous_index;

	527 }

	528 }

	529 }

	530 break;

	531 }

	532 case RegExpMacroAssembler::kBytecodeImplementation: {

	533 Handle<ByteArray> byte_codes = IrregexpCode(regexp);

	534

	535 rc = IrregexpInterpreter::Match(byte_codes,

	536 two_byte_subject,

	537 offsets_vector,

	538 previous_index);

	539 break;

	540 }

	541 case RegExpMacroAssembler::kARMImplementation:

	542 default:

	543 UNREACHABLE();

	544 rc = false;

	545 break;

	546 }

	547

	548 if (!rc) {

	549 return Factory::null_value();

	550 }

	551

	552 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));

	553 // The captures come in (start, end+1) pairs.

	554 for (int i = 0; i < 2 * (num_captures+1); i += 2) {

	555 array->set(i, Smi::FromInt(offsets_vector[i]));

	556 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));

	557 }

	558 return Factory::NewJSArrayWithElements(array);

	559 }

	560

	561

	562 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,

	563 int num_captures,

	564 Handle<String> subject,

	565 int previous_index,

	566 const uc16* two_byte_subject,

	567 int* offsets_vector,

	568 int offsets_vector_length) {

410 int rc;	569 int rc;

411 {	570 {

412 AssertNoAllocation a;	571 AssertNoAllocation a;

413 ByteArray* internal = JsreInternal(regexp);	572 ByteArray* internal = JscreInternal(regexp);

414 const JscreRegExp* js_regexp =	573 const v8::jscre::JscreRegExp* js_regexp =

415 reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());	574 reinterpret_cast<v8::jscre::JscreRegExp*>(

	575 internal->GetDataStartAddress());

416	576

417 LOG(RegExpExecEvent(regexp, previous_index, subject));	577 LOG(RegExpExecEvent(regexp, previous_index, subject));

418	578

419 rc = jsRegExpExecute(js_regexp,	579 rc = v8::jscre::jsRegExpExecute(js_regexp,

420 two_byte_subject,	580 two_byte_subject,

421 subject->length(),	581 subject->length(),

422 previous_index,	582 previous_index,

423 offsets_vector,	583 offsets_vector,

424 offsets_vector_length);	584 offsets_vector_length);

425 }	585 }

426	586

427 // The KJS JavaScript engine returns null (ie, a failed match) when	587 // The KJS JavaScript engine returns null (ie, a failed match) when

428 // JSRE's internal match limit is exceeded. We duplicate that behavior here.	588 // JSRE's internal match limit is exceeded. We duplicate that behavior here.

429 if (rc == JSRegExpErrorNoMatch	589 if (rc == v8::jscre::JSRegExpErrorNoMatch

430 \|\| rc == JSRegExpErrorHitLimit) {	590 \|\| rc == v8::jscre::JSRegExpErrorHitLimit) {

431 return Factory::null_value();	591 return Factory::null_value();

432 }	592 }

433	593

434 // Other JSRE errors:	594 // Other JSRE errors:

435 if (rc < 0) {	595 if (rc < 0) {

436 // Throw an exception.	596 // Throw an exception.

437 Handle<Object> code(Smi::FromInt(rc));	597 Handle<Object> code(Smi::FromInt(rc));

438 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };	598 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };

439 Handle<Object> regexp_err(	599 Handle<Object> regexp_err(

440 Factory::NewTypeError("jsre_error", HandleVector(args, 2)));	600 Factory::NewTypeError("jsre_error", HandleVector(args, 2)));

441 return Handle<Object>(Top::Throw(*regexp_err));	601 return Handle<Object>(Top::Throw(*regexp_err));

442 }	602 }

443	603

444 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));	604 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));

445 // The captures come in (start, end+1) pairs.	605 // The captures come in (start, end+1) pairs.

446 for (int i = 0; i < 2 * (num_captures+1); i += 2) {	606 for (int i = 0; i < 2 * (num_captures+1); i += 2) {

447 array->set(i,	607 array->set(i, Smi::FromInt(offsets_vector[i]));

448 Smi::FromInt(offsets_vector[i]),	608 array->set(i+1, Smi::FromInt(offsets_vector[i+1]));

449 SKIP_WRITE_BARRIER);

450 array->set(i+1,

451 Smi::FromInt(offsets_vector[i+1]),

452 SKIP_WRITE_BARRIER);

453 }	609 }

454 return Factory::NewJSArrayWithElements(array);	610 return Factory::NewJSArrayWithElements(array);

455 }	611 }

456	612

457	613

458 class OffsetsVector {	614 class OffsetsVector {

459 public:	615 public:

460 inline OffsetsVector(int num_captures) {	616 inline OffsetsVector(int num_registers)

461 offsets_vector_length_ = (num_captures + 1) * 3;	617 : offsets_vector_length_(num_registers) {

462 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {	618 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {

463 vector_ = NewArray<int>(offsets_vector_length_);	619 vector_ = NewArray<int>(offsets_vector_length_);

464 } else {	620 } else {

465 vector_ = static_offsets_vector_;	621 vector_ = static_offsets_vector_;

466 }	622 }

467 }	623 }

468	624

469	625

470 inline ~OffsetsVector() {	626 inline ~OffsetsVector() {

471 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {	627 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {

472 DeleteArray(vector_);	628 DeleteArray(vector_);

473 vector_ = NULL;	629 vector_ = NULL;

474 }	630 }

475 }	631 }

476	632

477	633

478 inline int* vector() {	634 inline int* vector() {

479 return vector_;	635 return vector_;

480 }	636 }

481	637

482	638

483 inline int length() {	639 inline int length() {

484 return offsets_vector_length_;	640 return offsets_vector_length_;

485 }	641 }

486	642

487 private:	643 private:

488 int* vector_;	644 int* vector_;

489 int offsets_vector_length_;	645 int offsets_vector_length_;

490 static const int kStaticOffsetsVectorSize = 30;	646 static const int kStaticOffsetsVectorSize = 50;

491 static int static_offsets_vector_[kStaticOffsetsVectorSize];	647 static int static_offsets_vector_[kStaticOffsetsVectorSize];

492 };	648 };

493	649

494	650

495 int OffsetsVector::static_offsets_vector_[	651 int OffsetsVector::static_offsets_vector_[

496 OffsetsVector::kStaticOffsetsVectorSize];	652 OffsetsVector::kStaticOffsetsVectorSize];

497	653

498	654

499 Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,	655 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,

500 Handle<String> subject,	656 Handle<String> subject,

501 Handle<Object> index) {	657 Handle<Object> index) {

	658 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);

	659 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());

	660

502 // Prepare space for the return values.	661 // Prepare space for the return values.

503 int num_captures = JsreCapture(regexp);	662 int number_of_registers = IrregexpNumberOfRegisters(regexp);

	663 OffsetsVector offsets(number_of_registers);

504	664

505 OffsetsVector offsets(num_captures);	665 int num_captures = IrregexpNumberOfCaptures(regexp);

506	666

507 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));	667 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));

508	668

509 Handle<String> subject16 = CachedStringToTwoByte(subject);	669 Handle<String> subject16 = CachedStringToTwoByte(subject);

510	670

511 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,	671 Handle<Object> result(IrregexpExecOnce(regexp,

512 previous_index,	672 num_captures,

513 subject16->GetTwoByteData(),	673 subject16,

514 offsets.vector(), offsets.length()));	674 previous_index,

	675 offsets.vector(),

	676 offsets.length()));

	677 return result;

	678 }

	679

	680

	681 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,

	682 Handle<String> subject,

	683 Handle<Object> index) {

	684 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);

	685 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {

	686 Handle<Object> compile_result = JscreCompile(regexp);

	687 if (compile_result.is_null()) return compile_result;

	688 }

	689 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

	690

	691 int num_captures = JscreNumberOfCaptures(regexp);

	692

	693 OffsetsVector offsets((num_captures + 1) * 3);

	694

	695 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));

	696

	697 Handle<String> subject16 = CachedStringToTwoByte(subject);

	698

	699 Handle<Object> result(JscreExecOnce(regexp,

	700 num_captures,

	701 subject,

	702 previous_index,

	703 subject16->GetTwoByteData(),

	704 offsets.vector(),

	705 offsets.length()));

515	706

516 return result;	707 return result;

517 }	708 }

518	709

519	710

520 Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,	711 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,

521 Handle<String> subject) {	712 Handle<String> subject) {

	713 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);

	714 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());

	715

522 // Prepare space for the return values.	716 // Prepare space for the return values.

523 int num_captures = JsreCapture(regexp);	717 int number_of_registers = IrregexpNumberOfRegisters(regexp);

524	718 OffsetsVector offsets(number_of_registers);

525 OffsetsVector offsets(num_captures);

526	719

527 int previous_index = 0;	720 int previous_index = 0;

528	721

529 Handle<JSArray> result = Factory::NewJSArray(0);	722 Handle<JSArray> result = Factory::NewJSArray(0);

	723 int i = 0;

	724 Handle<Object> matches;

	725

	726 Handle<String> subject16 = CachedStringToTwoByte(subject);

	727

	728 do {

	729 if (previous_index > subject->length() \|\| previous_index < 0) {

	730 // Per ECMA-262 15.10.6.2, if the previous index is greater than the

	731 // string length, there is no match.

	732 matches = Factory::null_value();

	733 } else {

	734 matches = IrregexpExecOnce(regexp,

	735 IrregexpNumberOfCaptures(regexp),

	736 subject16,

	737 previous_index,

	738 offsets.vector(),

	739 offsets.length());

	740

	741 if (matches->IsJSArray()) {

	742 SetElement(result, i, matches);

	743 i++;

	744 previous_index = offsets.vector()[1];

	745 if (offsets.vector()[0] == offsets.vector()[1]) {

	746 previous_index++;

	747 }

	748 }

	749 }

	750 } while (matches->IsJSArray());

	751

	752 // If we exited the loop with an exception, throw it.

	753 if (matches->IsNull()) {

	754 // Exited loop normally.

	755 return result;

	756 } else {

	757 // Exited loop with the exception in matches.

	758 return matches;

	759 }

	760 }

	761

	762

	763 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,

	764 Handle<String> subject) {

	765 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);

	766 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {

	767 Handle<Object> compile_result = JscreCompile(regexp);

	768 if (compile_result.is_null()) return compile_result;

	769 }

	770 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

	771

	772 // Prepare space for the return values.

	773 int num_captures = JscreNumberOfCaptures(regexp);

	774

	775 OffsetsVector offsets((num_captures + 1) * 3);

	776

	777 int previous_index = 0;

	778

	779 Handle<JSArray> result = Factory::NewJSArray(0);

530 int i = 0;	780 int i = 0;

531 Handle<Object> matches;	781 Handle<Object> matches;

532	782

533 Handle<String> subject16 = CachedStringToTwoByte(subject);	783 Handle<String> subject16 = CachedStringToTwoByte(subject);

534	784

535 do {	785 do {

536 if (previous_index > subject->length() \|\| previous_index < 0) {	786 if (previous_index > subject->length() \|\| previous_index < 0) {

537 // Per ECMA-262 15.10.6.2, if the previous index is greater than the	787 // Per ECMA-262 15.10.6.2, if the previous index is greater than the

538 // string length, there is no match.	788 // string length, there is no match.

539 matches = Factory::null_value();	789 matches = Factory::null_value();

540 } else {	790 } else {

541 matches = JsreExecOnce(regexp, num_captures, subject, previous_index,	791 matches = JscreExecOnce(regexp,

542 subject16->GetTwoByteData(),	792 num_captures,

543 offsets.vector(), offsets.length());	793 subject,

	794 previous_index,

	795 subject16->GetTwoByteData(),

	796 offsets.vector(),

	797 offsets.length());

544	798

545 if (matches->IsJSArray()) {	799 if (matches->IsJSArray()) {

546 SetElement(result, i, matches);	800 SetElement(result, i, matches);

547 i++;	801 i++;

548 previous_index = offsets.vector()[1];	802 previous_index = offsets.vector()[1];

549 if (offsets.vector()[0] == offsets.vector()[1]) {	803 if (offsets.vector()[0] == offsets.vector()[1]) {

550 previous_index++;	804 previous_index++;

551 }	805 }

552 }	806 }

553 }	807 }

554 } while (matches->IsJSArray());	808 } while (matches->IsJSArray());

555	809

556 // If we exited the loop with an exception, throw it.	810 // If we exited the loop with an exception, throw it.

557 if (matches->IsNull()) { // Exited loop normally.	811 if (matches->IsNull()) {

	812 // Exited loop normally.

558 return result;	813 return result;

559 } else { // Exited loop with the exception in matches.	814 } else {

	815 // Exited loop with the exception in matches.

560 return matches;	816 return matches;

561 }	817 }

562 }	818 }

563	819

564	820

565 int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {	821 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {

566 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));	822 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));

567 return Smi::cast(value->get(CAPTURE_INDEX))->value();	823 return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();

568 }	824 }

569	825

570	826

571 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {	827 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {

572 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));	828 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));

573 return ByteArray::cast(value->get(INTERNAL_INDEX));	829 return ByteArray::cast(value->get(kJscreInternalIndex));

574 }	830 }

	831

	832

	833 int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {

	834 FixedArray* value =

	835 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));

	836 return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();

	837 }

	838

	839

	840 int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {

	841 FixedArray* value =

	842 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));

	843 return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();

	844 }

	845

	846

	847 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {

	848 FixedArray* value =

	849 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));

	850 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));

	851 }

	852

	853

	854 // -------------------------------------------------------------------

	855 // Implmentation of the Irregexp regular expression engine.

	856

	857

	858 void RegExpTree::AppendToText(RegExpText* text) {

	859 UNREACHABLE();

	860 }

	861

	862

	863 void RegExpAtom::AppendToText(RegExpText* text) {

	864 text->AddElement(TextElement::Atom(this));

	865 }

	866

	867

	868 void RegExpCharacterClass::AppendToText(RegExpText* text) {

	869 text->AddElement(TextElement::CharClass(this));

	870 }

	871

	872

	873 void RegExpText::AppendToText(RegExpText* text) {

	874 for (int i = 0; i < elements()->length(); i++)

	875 text->AddElement(elements()->at(i));

	876 }

	877

	878

	879 TextElement TextElement::Atom(RegExpAtom* atom) {

	880 TextElement result = TextElement(ATOM);

	881 result.data.u_atom = atom;

	882 return result;

	883 }

	884

	885

	886 TextElement TextElement::CharClass(

	887 RegExpCharacterClass* char_class) {

	888 TextElement result = TextElement(CHAR_CLASS);

	889 result.data.u_char_class = char_class;

	890 return result;

	891 }

	892

	893

	894 class RegExpCompiler {

	895 public:

	896 RegExpCompiler(int capture_count, bool ignore_case);

	897

	898 int AllocateRegister() { return next_register_++; }

	899

	900 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,

	901 RegExpNode* start,

	902 int capture_count);

	903

	904 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

	905

	906 static const int kImplementationOffset = 0;

	907 static const int kNumberOfRegistersOffset = 0;

	908 static const int kCodeOffset = 1;

	909

	910 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }

	911 EndNode* accept() { return accept_; }

	912 EndNode* backtrack() { return backtrack_; }

	913

	914 static const int kMaxRecursion = 100;

	915 inline int recursion_depth() { return recursion_depth_; }

	916 inline void IncrementRecursionDepth() { recursion_depth_++; }

	917 inline void DecrementRecursionDepth() { recursion_depth_--; }

	918

	919 inline bool ignore_case() { return ignore_case_; }

	920

	921 private:

	922 EndNode* accept_;

	923 EndNode* backtrack_;

	924 int next_register_;

	925 List<RegExpNode> work_list_;

	926 int recursion_depth_;

	927 RegExpMacroAssembler* macro_assembler_;

	928 bool ignore_case_;

	929 };

	930

	931

	932 // Attempts to compile the regexp using an Irregexp code generator. Returns

	933 // a fixed array or a null handle depending on whether it succeeded.

	934 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)

	935 : next_register_(2 * (capture_count + 1)),

	936 work_list_(NULL),

	937 recursion_depth_(0),

	938 ignore_case_(ignore_case) {

	939 accept_ = new EndNode(EndNode::ACCEPT);

	940 backtrack_ = new EndNode(EndNode::BACKTRACK);

	941 }

	942

	943

	944 Handle<FixedArray> RegExpCompiler::Assemble(

	945 RegExpMacroAssembler* macro_assembler,

	946 RegExpNode* start,

	947 int capture_count) {

	948 #ifdef DEBUG

	949 if (FLAG_trace_regexp_assembler)

	950 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);

	951 else

	952 #endif

	953 macro_assembler_ = macro_assembler;

	954 List <RegExpNode*> work_list(0);

	955 work_list_ = &work_list;

	956 Label fail;

	957 macro_assembler_->PushBacktrack(&fail);

	958 if (!start->GoTo(this)) {

	959 fail.Unuse();

	960 return Handle<FixedArray>::null();

	961 }

	962 while (!work_list.is_empty()) {

	963 if (!work_list.RemoveLast()->GoTo(this)) {

	964 fail.Unuse();

	965 return Handle<FixedArray>::null();

	966 }

	967 }

	968 macro_assembler_->Bind(&fail);

	969 macro_assembler_->Fail();

	970 Handle<FixedArray> array =

	971 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);

	972 array->set(RegExpImpl::kIrregexpImplementationIndex,

	973 Smi::FromInt(macro_assembler_->Implementation()));

	974 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,

	975 Smi::FromInt(next_register_));

	976 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,

	977 Smi::FromInt(capture_count));

	978 Handle<Object> code = macro_assembler_->GetCode();

	979 array->set(RegExpImpl::kIrregexpCodeIndex, *code);

	980 work_list_ = NULL;

	981 #ifdef DEBUG

	982 if (FLAG_trace_regexp_assembler) {

	983 delete macro_assembler_;

	984 }

	985 #endif

	986 return array;

	987 }

	988

	989

	990 bool RegExpNode::GoTo(RegExpCompiler* compiler) {

	991 // TODO(erikcorry): Implement support.

	992 if (info_.follows_word_interest \|\|

	993 info_.follows_newline_interest \|\|

	994 info_.follows_start_interest) {

	995 return false;

	996 }

	997 if (label_.is_bound()) {

	998 compiler->macro_assembler()->GoTo(&label_);

	999 return true;

	1000 } else {

	1001 if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) {

	1002 compiler->macro_assembler()->GoTo(&label_);

	1003 compiler->AddWork(this);

	1004 return true;

	1005 } else {

	1006 compiler->IncrementRecursionDepth();

	1007 bool how_it_went = Emit(compiler);

	1008 compiler->DecrementRecursionDepth();

	1009 return how_it_went;

	1010 }

	1011 }

	1012 }

	1013

	1014

	1015 // EndNodes are special. Because they can be very common and they are very

	1016 // short we normally inline them. That is, if we are asked to emit a GoTo

	1017 // we just emit the entire node. Since they don't have successors this

	1018 // works.

	1019 bool EndNode::GoTo(RegExpCompiler* compiler) {

	1020 if (info()->follows_word_interest \|\|

	1021 info()->follows_newline_interest \|\|

	1022 info()->follows_start_interest) {

	1023 return false;

	1024 }

	1025 return Emit(compiler);

	1026 }

	1027

	1028

	1029 Label* RegExpNode::label() {

	1030 return &label_;

	1031 }

	1032

	1033

	1034 bool EndNode::Emit(RegExpCompiler* compiler) {

	1035 RegExpMacroAssembler* macro = compiler->macro_assembler();

	1036 switch (action_) {

	1037 case ACCEPT:

	1038 if (!label()->is_bound()) Bind(macro);

	1039 if (info()->at_end) {

	1040 Label succeed;

	1041 // LoadCurrentCharacter will go to the label if we are at the end of the

	1042 // input string.

	1043 macro->LoadCurrentCharacter(0, &succeed);

	1044 macro->Backtrack();

	1045 macro->Bind(&succeed);

	1046 }

	1047 macro->Succeed();

	1048 return true;

	1049 case BACKTRACK:

	1050 if (!label()->is_bound()) Bind(macro);

	1051 ASSERT(!info()->at_end);

	1052 macro->Backtrack();

	1053 return true;

	1054 }

	1055 return false;

	1056 }

	1057

	1058

	1059 void GuardedAlternative::AddGuard(Guard* guard) {

	1060 if (guards_ == NULL)

	1061 guards_ = new ZoneList<Guard*>(1);

	1062 guards_->Add(guard);

	1063 }

	1064

	1065

	1066 ActionNode* ActionNode::StoreRegister(int reg,

	1067 int val,

	1068 RegExpNode* on_success) {

	1069 ActionNode* result = new ActionNode(STORE_REGISTER, on_success);

	1070 result->data_.u_store_register.reg = reg;

	1071 result->data_.u_store_register.value = val;

	1072 return result;

	1073 }

	1074

	1075

	1076 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {

	1077 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);

	1078 result->data_.u_increment_register.reg = reg;

	1079 return result;

	1080 }

	1081

	1082

	1083 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {

	1084 ActionNode* result = new ActionNode(STORE_POSITION, on_success);

	1085 result->data_.u_position_register.reg = reg;

	1086 return result;

	1087 }

	1088

	1089

	1090 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {

	1091 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);

	1092 result->data_.u_position_register.reg = reg;

	1093 return result;

	1094 }

	1095

	1096

	1097 ActionNode* ActionNode::BeginSubmatch(int stack_reg,

	1098 int position_reg,

	1099 RegExpNode* on_success) {

	1100 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);

	1101 result->data_.u_submatch.stack_pointer_register = stack_reg;

	1102 result->data_.u_submatch.current_position_register = position_reg;

	1103 return result;

	1104 }

	1105

	1106

	1107 ActionNode* ActionNode::EscapeSubmatch(int stack_reg,

	1108 bool restore_position,

	1109 int position_reg,

	1110 RegExpNode* on_success) {

	1111 ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);

	1112 result->data_.u_submatch.stack_pointer_register = stack_reg;

	1113 if (restore_position) {

	1114 result->data_.u_submatch.current_position_register = position_reg;

	1115 } else {

	1116 result->data_.u_submatch.current_position_register = -1;

	1117 }

	1118 return result;

	1119 }

	1120

	1121

	1122 #define DEFINE_ACCEPT(Type) \

	1123 void Type##Node::Accept(NodeVisitor* visitor) { \

	1124 visitor->Visit##Type(this); \

	1125 }

	1126 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)

	1127 #undef DEFINE_ACCEPT

	1128

	1129

	1130 // -------------------------------------------------------------------

	1131 // Emit code.

	1132

	1133

	1134 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,

	1135 Guard* guard,

	1136 Label* on_failure) {

	1137 switch (guard->op()) {

	1138 case Guard::LT:

	1139 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure);

	1140 break;

	1141 case Guard::GEQ:

	1142 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure);

	1143 break;

	1144 }

	1145 }

	1146

	1147

	1148 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;

	1149 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;

	1150

	1151

	1152 static inline void EmitAtomNonLetters(

	1153 RegExpMacroAssembler* macro_assembler,

	1154 TextElement elm,

	1155 Vector<const uc16> quarks,

	1156 Label* on_failure,

	1157 int cp_offset) {

	1158 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	1159 for (int i = quarks.length() - 1; i >= 0; i--) {

	1160 uc16 c = quarks[i];

	1161 int length = uncanonicalize.get(c, '\0', chars);

	1162 if (length <= 1) {

	1163 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);

	1164 macro_assembler->CheckNotCharacter(c, on_failure);

	1165 }

	1166 }

	1167 }

	1168

	1169

	1170 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

	1171 uc16 c1,

	1172 uc16 c2,

	1173 Label* on_failure) {

	1174 uc16 exor = c1 ^ c2;

	1175 // Check whether exor has only one bit set.

	1176 if (((exor - 1) & exor) == 0) {

	1177 // If c1 and c2 differ only by one bit.

	1178 // Ecma262UnCanonicalize always gives the highest number last.

	1179 ASSERT(c2 > c1);

	1180 macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);

	1181 return true;

	1182 }

	1183 ASSERT(c2 > c1);

	1184 uc16 diff = c2 - c1;

	1185 if (((diff - 1) & diff) == 0 && c1 >= diff) {

	1186 // If the characters differ by 2^n but don't differ by one bit then

	1187 // subtract the difference from the found character, then do the or

	1188 // trick. We avoid the theoretical case where negative numbers are

	1189 // involved in order to simplify code generation.

	1190 macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,

	1191 diff,

	1192 on_failure);

	1193 return true;

	1194 }

	1195 return false;

	1196 }

	1197

	1198

	1199 static inline void EmitAtomLetters(

	1200 RegExpMacroAssembler* macro_assembler,

	1201 TextElement elm,

	1202 Vector<const uc16> quarks,

	1203 Label* on_failure,

	1204 int cp_offset) {

	1205 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	1206 for (int i = quarks.length() - 1; i >= 0; i--) {

	1207 uc16 c = quarks[i];

	1208 int length = uncanonicalize.get(c, '\0', chars);

	1209 if (length <= 1) continue;

	1210 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);

	1211 Label ok;

	1212 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

	1213 switch (length) {

	1214 case 2: {

	1215 if (ShortCutEmitCharacterPair(macro_assembler,

	1216 chars[0],

	1217 chars[1],

	1218 on_failure)) {

	1219 ok.Unuse();

	1220 } else {

	1221 macro_assembler->CheckCharacter(chars[0], &ok);

	1222 macro_assembler->CheckNotCharacter(chars[1], on_failure);

	1223 macro_assembler->Bind(&ok);

	1224 }

	1225 break;

	1226 }

	1227 case 4:

	1228 macro_assembler->CheckCharacter(chars[3], &ok);

	1229 // Fall through!

	1230 case 3:

	1231 macro_assembler->CheckCharacter(chars[0], &ok);

	1232 macro_assembler->CheckCharacter(chars[1], &ok);

	1233 macro_assembler->CheckNotCharacter(chars[2], on_failure);

	1234 macro_assembler->Bind(&ok);

	1235 break;

	1236 default:

	1237 UNREACHABLE();

	1238 break;

	1239 }

	1240 }

	1241 }

	1242

	1243

	1244 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

	1245 RegExpCharacterClass* cc,

	1246 int cp_offset,

	1247 Label* on_failure) {

	1248 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);

	1249 cp_offset++;

	1250

	1251 ZoneList<CharacterRange>* ranges = cc->ranges();

	1252

	1253 Label success;

	1254

	1255 Label* char_is_in_class =

	1256 cc->is_negated() ? on_failure : &success;

	1257

	1258 int range_count = ranges->length();

	1259

	1260 if (range_count == 0) {

	1261 if (!cc->is_negated()) {

	1262 macro_assembler->GoTo(on_failure);

	1263 }

	1264 return;

	1265 }

	1266

	1267 for (int i = 0; i < range_count - 1; i++) {

	1268 CharacterRange& range = ranges->at(i);

	1269 Label next_range;

	1270 uc16 from = range.from();

	1271 uc16 to = range.to();

	1272 if (to == from) {

	1273 macro_assembler->CheckCharacter(to, char_is_in_class);

	1274 } else {

	1275 if (from != 0) {

	1276 macro_assembler->CheckCharacterLT(from, &next_range);

	1277 }

	1278 if (to != 0xffff) {

	1279 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);

	1280 } else {

	1281 macro_assembler->GoTo(char_is_in_class);

	1282 }

	1283 }

	1284 macro_assembler->Bind(&next_range);

	1285 }

	1286

	1287 CharacterRange& range = ranges->at(range_count - 1);

	1288 uc16 from = range.from();

	1289 uc16 to = range.to();

	1290

	1291 if (to == from) {

	1292 if (cc->is_negated()) {

	1293 macro_assembler->CheckCharacter(to, on_failure);

	1294 } else {

	1295 macro_assembler->CheckNotCharacter(to, on_failure);

	1296 }

	1297 } else {

	1298 if (from != 0) {

	1299 if (cc->is_negated()) {

	1300 macro_assembler->CheckCharacterLT(from, &success);

	1301 } else {

	1302 macro_assembler->CheckCharacterLT(from, on_failure);

	1303 }

	1304 }

	1305 if (to != 0xffff) {

	1306 if (cc->is_negated()) {

	1307 macro_assembler->CheckCharacterLT(to + 1, on_failure);

	1308 } else {

	1309 macro_assembler->CheckCharacterGT(to, on_failure);

	1310 }

	1311 } else {

	1312 if (cc->is_negated()) {

	1313 macro_assembler->GoTo(on_failure);

	1314 }

	1315 }

	1316 }

	1317 macro_assembler->Bind(&success);

	1318 }

	1319

	1320

	1321

	1322 bool TextNode::Emit(RegExpCompiler* compiler) {

	1323 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1324 Bind(macro_assembler);

	1325 int element_count = elms_->length();

	1326 ASSERT(element_count != 0);

	1327 int cp_offset = 0;

	1328 if (info()->at_end) {

	1329 macro_assembler->Backtrack();

	1330 return true;

	1331 }

	1332 // First, handle straight character matches.

	1333 for (int i = 0; i < element_count; i++) {

	1334 TextElement elm = elms_->at(i);

	1335 if (elm.type == TextElement::ATOM) {

	1336 Vector<const uc16> quarks = elm.data.u_atom->data();

	1337 if (compiler->ignore_case()) {

	1338 EmitAtomNonLetters(macro_assembler,

	1339 elm,

	1340 quarks,

	1341 on_failure_->label(),

	1342 cp_offset);

	1343 } else {

	1344 macro_assembler->CheckCharacters(quarks,

	1345 cp_offset,

	1346 on_failure_->label());

	1347 }

	1348 cp_offset += quarks.length();

	1349 } else {

	1350 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);

	1351 cp_offset++;

	1352 }

	1353 }

	1354 // Second, handle case independent letter matches if any.

	1355 if (compiler->ignore_case()) {

	1356 cp_offset = 0;

	1357 for (int i = 0; i < element_count; i++) {

	1358 TextElement elm = elms_->at(i);

	1359 if (elm.type == TextElement::ATOM) {

	1360 Vector<const uc16> quarks = elm.data.u_atom->data();

	1361 EmitAtomLetters(macro_assembler,

	1362 elm,

	1363 quarks,

	1364 on_failure_->label(),

	1365 cp_offset);

	1366 cp_offset += quarks.length();

	1367 } else {

	1368 cp_offset++;

	1369 }

	1370 }

	1371 }

	1372 // If the fast character matches passed then do the character classes.

	1373 cp_offset = 0;

	1374 for (int i = 0; i < element_count; i++) {

	1375 TextElement elm = elms_->at(i);

	1376 if (elm.type == TextElement::CHAR_CLASS) {

	1377 RegExpCharacterClass* cc = elm.data.u_char_class;

	1378 EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label());

	1379 cp_offset++;

	1380 } else {

	1381 cp_offset += elm.data.u_atom->data().length();

	1382 }

	1383 }

	1384

	1385 compiler->AddWork(on_failure_);

	1386 macro_assembler->AdvanceCurrentPosition(cp_offset);

	1387 return on_success()->GoTo(compiler);

	1388 }

	1389

	1390

	1391 void TextNode::MakeCaseIndependent() {

	1392 int element_count = elms_->length();

	1393 for (int i = 0; i < element_count; i++) {

	1394 TextElement elm = elms_->at(i);

	1395 if (elm.type == TextElement::CHAR_CLASS) {

	1396 RegExpCharacterClass* cc = elm.data.u_char_class;

	1397 ZoneList<CharacterRange>* ranges = cc->ranges();

	1398 int range_count = ranges->length();

	1399 for (int i = 0; i < range_count; i++) {

	1400 ranges->at(i).AddCaseEquivalents(ranges);

	1401 }

	1402 }

	1403 }

	1404 }

	1405

	1406

	1407 bool ChoiceNode::Emit(RegExpCompiler* compiler) {

	1408 int choice_count = alternatives_->length();

	1409 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1410 Bind(macro_assembler);

	1411 // For now we just call all choices one after the other. The idea ultimately

	1412 // is to use the Dispatch table to try only the relevant ones.

	1413 for (int i = 0; i < choice_count - 1; i++) {

	1414 GuardedAlternative alternative = alternatives_->at(i);

	1415 Label after;

	1416 Label after_no_pop_cp;

	1417 ZoneList<Guard> guards = alternative.guards();

	1418 if (guards != NULL) {

	1419 int guard_count = guards->length();

	1420 for (int j = 0; j < guard_count; j++) {

	1421 GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp);

	1422 }

	1423 }

	1424 macro_assembler->PushCurrentPosition();

	1425 macro_assembler->PushBacktrack(&after);

	1426 if (!alternative.node()->GoTo(compiler)) {

	1427 after.Unuse();

	1428 after_no_pop_cp.Unuse();

	1429 return false;

	1430 }

	1431 macro_assembler->Bind(&after);

	1432 macro_assembler->PopCurrentPosition();

	1433 macro_assembler->Bind(&after_no_pop_cp);

	1434 }

	1435 GuardedAlternative alternative = alternatives_->at(choice_count - 1);

	1436 ZoneList<Guard> guards = alternative.guards();

	1437 if (guards != NULL) {

	1438 int guard_count = guards->length();

	1439 for (int j = 0; j < guard_count; j++) {

	1440 GenerateGuard(macro_assembler, guards->at(j), on_failure_->label());

	1441 }

	1442 }

	1443 if (!on_failure_->IsBacktrack()) {

	1444 ASSERT_NOT_NULL(on_failure_ -> label());

	1445 macro_assembler->PushBacktrack(on_failure_->label());

	1446 compiler->AddWork(on_failure_);

	1447 }

	1448 if (!alternative.node()->GoTo(compiler)) {

	1449 return false;

	1450 }

	1451 return true;

	1452 }

	1453

	1454

	1455 bool ActionNode::Emit(RegExpCompiler* compiler) {

	1456 RegExpMacroAssembler* macro = compiler->macro_assembler();

	1457 Bind(macro);

	1458 switch (type_) {

	1459 case STORE_REGISTER:

	1460 macro->SetRegister(data_.u_store_register.reg,

	1461 data_.u_store_register.value);

	1462 break;

	1463 case INCREMENT_REGISTER: {

	1464 Label undo;

	1465 macro->PushBacktrack(&undo);

	1466 macro->AdvanceRegister(data_.u_increment_register.reg, 1);

	1467 bool ok = on_success()->GoTo(compiler);

	1468 if (!ok) {

	1469 undo.Unuse();

	1470 return false;

	1471 }

	1472 macro->Bind(&undo);

	1473 macro->AdvanceRegister(data_.u_increment_register.reg, -1);

	1474 macro->Backtrack();

	1475 break;

	1476 }

	1477 case STORE_POSITION: {

	1478 Label undo;

	1479 macro->PushRegister(data_.u_position_register.reg);

	1480 macro->PushBacktrack(&undo);

	1481 macro->WriteCurrentPositionToRegister(data_.u_position_register.reg);

	1482 bool ok = on_success()->GoTo(compiler);

	1483 if (!ok) {

	1484 undo.Unuse();

	1485 return false;

	1486 }

	1487 macro->Bind(&undo);

	1488 macro->PopRegister(data_.u_position_register.reg);

	1489 macro->Backtrack();

	1490 break;

	1491 }

	1492 case RESTORE_POSITION:

	1493 macro->ReadCurrentPositionFromRegister(

	1494 data_.u_position_register.reg);

	1495 break;

	1496 case BEGIN_SUBMATCH:

	1497 macro->WriteCurrentPositionToRegister(

	1498 data_.u_submatch.current_position_register);

	1499 macro->WriteStackPointerToRegister(

	1500 data_.u_submatch.stack_pointer_register);

	1501 break;

	1502 case ESCAPE_SUBMATCH:

	1503 if (info()->at_end) {

	1504 Label at_end;

	1505 // Load current character jumps to the label if we are beyond the string

	1506 // end.

	1507 macro->LoadCurrentCharacter(0, &at_end);

	1508 macro->Backtrack();

	1509 macro->Bind(&at_end);

	1510 }

	1511 if (data_.u_submatch.current_position_register != -1) {

	1512 macro->ReadCurrentPositionFromRegister(

	1513 data_.u_submatch.current_position_register);

	1514 }

	1515 macro->ReadStackPointerFromRegister(

	1516 data_.u_submatch.stack_pointer_register);

	1517 break;

	1518 default:

	1519 UNREACHABLE();

	1520 return false;

	1521 }

	1522 return on_success()->GoTo(compiler);

	1523 }

	1524

	1525

	1526 bool BackReferenceNode::Emit(RegExpCompiler* compiler) {

	1527 RegExpMacroAssembler* macro = compiler->macro_assembler();

	1528 Bind(macro);

	1529 // Check whether the registers are uninitialized and always

	1530 // succeed if they are.

	1531 macro->IfRegisterLT(start_reg_, 0, on_success()->label());

	1532 macro->IfRegisterLT(end_reg_, 0, on_success()->label());

	1533 ASSERT_EQ(start_reg_ + 1, end_reg_);

	1534 if (info()->at_end) {

	1535 // If we are constrained to match at the end of the input then succeed

	1536 // iff the back reference is empty.

	1537 macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label());

	1538 } else {

	1539 if (compiler->ignore_case()) {

	1540 macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());

	1541 } else {

	1542 macro->CheckNotBackReference(start_reg_, on_failure_->label());

	1543 }

	1544 }

	1545 return on_success()->GoTo(compiler);

	1546 }

	1547

	1548

	1549 // -------------------------------------------------------------------

	1550 // Dot/dotty output

	1551

	1552

	1553 #ifdef DEBUG

	1554

	1555

	1556 class DotPrinter: public NodeVisitor {

	1557 public:

	1558 DotPrinter() : stream_(&alloc_) { }

	1559 void PrintNode(const char* label, RegExpNode* node);

	1560 void Visit(RegExpNode* node);

	1561 void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure);

	1562 void PrintAttributes(RegExpNode* from);

	1563 StringStream* stream() { return &stream_; }

	1564 #define DECLARE_VISIT(Type) \

	1565 virtual void Visit##Type(Type##Node* that);

	1566 FOR_EACH_NODE_TYPE(DECLARE_VISIT)

	1567 #undef DECLARE_VISIT

	1568 private:

	1569 HeapStringAllocator alloc_;

	1570 StringStream stream_;

	1571 std::set<RegExpNode*> seen_;

	1572 };

	1573

	1574

	1575 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {

	1576 stream()->Add("digraph G {\n graph [label=\"");

	1577 for (int i = 0; label[i]; i++) {

	1578 switch (label[i]) {

	1579 case '\\':

	1580 stream()->Add("\\\\");

	1581 break;

	1582 case '"':

	1583 stream()->Add("\"");

	1584 break;

	1585 default:

	1586 stream()->Put(label[i]);

	1587 break;

	1588 }

	1589 }

	1590 stream()->Add("\"];\n");

	1591 Visit(node);

	1592 stream()->Add("}\n");

	1593 printf("%s", *(stream()->ToCString()));

	1594 }

	1595

	1596

	1597 void DotPrinter::Visit(RegExpNode* node) {

	1598 if (seen_.find(node) != seen_.end())

	1599 return;

	1600 seen_.insert(node);

	1601 node->Accept(this);

	1602 }

	1603

	1604

	1605 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {

	1606 if (on_failure->IsBacktrack()) return;

	1607 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);

	1608 Visit(on_failure);

	1609 }

	1610

	1611

	1612 class TableEntryBodyPrinter {

	1613 public:

	1614 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)

	1615 : stream_(stream), choice_(choice) { }

	1616 void Call(uc16 from, DispatchTable::Entry entry) {

	1617 OutSet* out_set = entry.out_set();

	1618 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {

	1619 if (out_set->Get(i)) {

	1620 stream()->Add(" n%p:s%io%i -> n%p;\n",

	1621 choice(),

	1622 from,

	1623 i,

	1624 choice()->alternatives()->at(i).node());

	1625 }

	1626 }

	1627 }

	1628 private:

	1629 StringStream* stream() { return stream_; }

	1630 ChoiceNode* choice() { return choice_; }

	1631 StringStream* stream_;

	1632 ChoiceNode* choice_;

	1633 };

	1634

	1635

	1636 class TableEntryHeaderPrinter {

	1637 public:

	1638 explicit TableEntryHeaderPrinter(StringStream* stream)

	1639 : first_(true), stream_(stream) { }

	1640 void Call(uc16 from, DispatchTable::Entry entry) {

	1641 if (first_) {

	1642 first_ = false;

	1643 } else {

	1644 stream()->Add("\|");

	1645 }

	1646 stream()->Add("{\\%k-\\%k\|{", from, entry.to());

	1647 OutSet* out_set = entry.out_set();

	1648 int priority = 0;

	1649 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {

	1650 if (out_set->Get(i)) {

	1651 if (priority > 0) stream()->Add("\|");

	1652 stream()->Add("<s%io%i> %i", from, i, priority);

	1653 priority++;

	1654 }

	1655 }

	1656 stream()->Add("}}");

	1657 }

	1658 private:

	1659 bool first_;

	1660 StringStream* stream() { return stream_; }

	1661 StringStream* stream_;

	1662 };

	1663

	1664

	1665 void DotPrinter::PrintAttributes(RegExpNode* that) {

	1666 stream()->Add(" a%p [shape=Mrecord, style=dashed, color=lightgrey, "

	1667 "fontcolor=lightgrey, margin=0.1, fontsize=10, label=\"{",

	1668 that);

	1669 NodeInfo* info = that->info();

	1670 stream()->Add("{NI\|%i}\|{WI\|%i}\|{SI\|%i}",

	1671 info->follows_newline_interest,

	1672 info->follows_word_interest,

	1673 info->follows_start_interest);

	1674 stream()->Add("\|{DN\|%i}\|{DW\|%i}\|{DS\|%i}\|{AE\|%i}",

	1675 info->determine_newline,

	1676 info->determine_word,

	1677 info->determine_start,

	1678 info->at_end);

	1679 if (info->follows_newline != NodeInfo::UNKNOWN)

	1680 stream()->Add("\|{FN\|%i}", info->follows_newline);

	1681 if (info->follows_word != NodeInfo::UNKNOWN)

	1682 stream()->Add("\|{FW\|%i}", info->follows_word);

	1683 if (info->follows_start != NodeInfo::UNKNOWN)

	1684 stream()->Add("\|{FS\|%i}", info->follows_start);

	1685 Label* label = that->label();

	1686 if (label->is_bound())

	1687 stream()->Add("\|{@\|%x}", label->pos());

	1688 stream()->Add("}\"];\n");

	1689 stream()->Add(" a%p -> n%p [style=dashed, color=lightgrey, "

	1690 "arrowhead=none];\n", that, that);

	1691 }

	1692

	1693

	1694 void DotPrinter::VisitChoice(ChoiceNode* that) {

	1695 stream()->Add(" n%p [shape=Mrecord, label=\"", that);

	1696 TableEntryHeaderPrinter header_printer(stream());

	1697 that->table()->ForEach(&header_printer);

	1698 stream()->Add("\"]\n", that);

	1699 PrintAttributes(that);

	1700 TableEntryBodyPrinter body_printer(stream(), that);

	1701 that->table()->ForEach(&body_printer);

	1702 PrintOnFailure(that, that->on_failure());

	1703 for (int i = 0; i < that->alternatives()->length(); i++) {

	1704 GuardedAlternative alt = that->alternatives()->at(i);

	1705 alt.node()->Accept(this);

	1706 }

	1707 }

	1708

	1709

	1710 void DotPrinter::VisitText(TextNode* that) {

	1711 stream()->Add(" n%p [label=\"", that);

	1712 for (int i = 0; i < that->elements()->length(); i++) {

	1713 if (i > 0) stream()->Add(" ");

	1714 TextElement elm = that->elements()->at(i);

	1715 switch (elm.type) {

	1716 case TextElement::ATOM: {

	1717 stream()->Add("'%w'", elm.data.u_atom->data());

	1718 break;

	1719 }

	1720 case TextElement::CHAR_CLASS: {

	1721 RegExpCharacterClass* node = elm.data.u_char_class;

	1722 stream()->Add("[");

	1723 if (node->is_negated())

	1724 stream()->Add("^");

	1725 for (int j = 0; j < node->ranges()->length(); j++) {

	1726 CharacterRange range = node->ranges()->at(j);

	1727 stream()->Add("%k-%k", range.from(), range.to());

	1728 }

	1729 stream()->Add("]");

	1730 break;

	1731 }

	1732 default:

	1733 UNREACHABLE();

	1734 }

	1735 }

	1736 stream()->Add("\", shape=box, peripheries=2];\n");

	1737 PrintAttributes(that);

	1738 stream()->Add(" n%p -> n%p;\n", that, that->on_success());

	1739 Visit(that->on_success());

	1740 PrintOnFailure(that, that->on_failure());

	1741 }

	1742

	1743

	1744 void DotPrinter::VisitBackReference(BackReferenceNode* that) {

	1745 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",

	1746 that,

	1747 that->start_register(),

	1748 that->end_register());

	1749 PrintAttributes(that);

	1750 stream()->Add(" n%p -> n%p;\n", that, that->on_success());

	1751 Visit(that->on_success());

	1752 PrintOnFailure(that, that->on_failure());

	1753 }

	1754

	1755

	1756 void DotPrinter::VisitEnd(EndNode* that) {

	1757 stream()->Add(" n%p [style=bold, shape=point];\n", that);

	1758 PrintAttributes(that);

	1759 }

	1760

	1761

	1762 void DotPrinter::VisitAction(ActionNode* that) {

	1763 stream()->Add(" n%p [", that);

	1764 switch (that->type_) {

	1765 case ActionNode::STORE_REGISTER:

	1766 stream()->Add("label=\"$%i:=%i\", shape=octagon",

	1767 that->data_.u_store_register.reg,

	1768 that->data_.u_store_register.value);

	1769 break;

	1770 case ActionNode::INCREMENT_REGISTER:

	1771 stream()->Add("label=\"$%i++\", shape=octagon",

	1772 that->data_.u_increment_register.reg);

	1773 break;

	1774 case ActionNode::STORE_POSITION:

	1775 stream()->Add("label=\"$%i:=$pos\", shape=octagon",

	1776 that->data_.u_position_register.reg);

	1777 break;

	1778 case ActionNode::RESTORE_POSITION:

	1779 stream()->Add("label=\"$pos:=$%i\", shape=octagon",

	1780 that->data_.u_position_register.reg);

	1781 break;

	1782 case ActionNode::BEGIN_SUBMATCH:

	1783 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",

	1784 that->data_.u_submatch.current_position_register);

	1785 break;

	1786 case ActionNode::ESCAPE_SUBMATCH:

	1787 stream()->Add("label=\"escape\", shape=septagon");

	1788 break;

	1789 }

	1790 stream()->Add("];\n");

	1791 PrintAttributes(that);

	1792 stream()->Add(" n%p -> n%p;\n", that, that->on_success());

	1793 Visit(that->on_success());

	1794 }

	1795

	1796

	1797 class DispatchTableDumper {

	1798 public:

	1799 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }

	1800 void Call(uc16 key, DispatchTable::Entry entry);

	1801 StringStream* stream() { return stream_; }

	1802 private:

	1803 StringStream* stream_;

	1804 };

	1805

	1806

	1807 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {

	1808 stream()->Add("[%k-%k]: {", key, entry.to());

	1809 OutSet* set = entry.out_set();

	1810 bool first = true;

	1811 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {

	1812 if (set->Get(i)) {

	1813 if (first) {

	1814 first = false;

	1815 } else {

	1816 stream()->Add(", ");

	1817 }

	1818 stream()->Add("%i", i);

	1819 }

	1820 }

	1821 stream()->Add("}\n");

	1822 }

	1823

	1824

	1825 void DispatchTable::Dump() {

	1826 HeapStringAllocator alloc;

	1827 StringStream stream(&alloc);

	1828 DispatchTableDumper dumper(&stream);

	1829 tree()->ForEach(&dumper);

	1830 OS::PrintError("%s", *stream.ToCString());

	1831 }

	1832

	1833

	1834 void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {

	1835 DotPrinter printer;

	1836 printer.PrintNode(label, node);

	1837 }

	1838

	1839

	1840 #endif // DEBUG

	1841

	1842

	1843 // -------------------------------------------------------------------

	1844 // Tree to graph conversion

	1845

	1846

	1847 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

	1848 RegExpNode* on_success,

	1849 RegExpNode* on_failure) {

	1850 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);

	1851 elms->Add(TextElement::Atom(this));

	1852 return new TextNode(elms, on_success, on_failure);

	1853 }

	1854

	1855

	1856 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

	1857 RegExpNode* on_success,

	1858 RegExpNode* on_failure) {

	1859 return new TextNode(elements(), on_success, on_failure);

	1860 }

	1861

	1862

	1863 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

	1864 RegExpNode* on_success,

	1865 RegExpNode* on_failure) {

	1866 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);

	1867 elms->Add(TextElement::CharClass(this));

	1868 return new TextNode(elms, on_success, on_failure);

	1869 }

	1870

	1871

	1872 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,

	1873 RegExpNode* on_success,

	1874 RegExpNode* on_failure) {

	1875 ZoneList<RegExpTree> alternatives = this->alternatives();

	1876 int length = alternatives->length();

	1877 ChoiceNode* result = new ChoiceNode(length, on_failure);

	1878 for (int i = 0; i < length; i++) {

	1879 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,

	1880 on_success,

	1881 on_failure));

	1882 result->AddAlternative(alternative);

	1883 }

	1884 return result;

	1885 }

	1886

	1887

	1888 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,

	1889 RegExpNode* on_success,

	1890 RegExpNode* on_failure) {

	1891 return ToNode(min(),

	1892 max(),

	1893 is_greedy(),

	1894 body(),

	1895 compiler,

	1896 on_success,

	1897 on_failure);

	1898 }

	1899

	1900

	1901 RegExpNode* RegExpQuantifier::ToNode(int min,

	1902 int max,

	1903 bool is_greedy,

	1904 RegExpTree* body,

	1905 RegExpCompiler* compiler,

	1906 RegExpNode* on_success,

	1907 RegExpNode* on_failure) {

	1908 // x{f, t} becomes this:

	1909 //

	1910 // (r++)<-.

	1911 // \| `

	1912 // \| (x)

	1913 // v ^

	1914 // (r=0)-->(?)---/ [if r < t]

	1915 // \|

	1916 // [if r >= f] \----> ...

	1917 //

	1918 //

	1919 // TODO(someone): clear captures on repetition and handle empty

	1920 // matches.

	1921 bool has_min = min > 0;

	1922 bool has_max = max < RegExpQuantifier::kInfinity;

	1923 bool needs_counter = has_min \|\| has_max;

	1924 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;

	1925 ChoiceNode* center = new ChoiceNode(2, on_failure);

	1926 RegExpNode* loop_return = needs_counter

	1927 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))

	1928 : static_cast<RegExpNode*>(center);

	1929 RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure);

	1930 GuardedAlternative body_alt(body_node);

	1931 if (has_max) {

	1932 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);

	1933 body_alt.AddGuard(body_guard);

	1934 }

	1935 GuardedAlternative rest_alt(on_success);

	1936 if (has_min) {

	1937 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);

	1938 rest_alt.AddGuard(rest_guard);

	1939 }

	1940 if (is_greedy) {

	1941 center->AddAlternative(body_alt);

	1942 center->AddAlternative(rest_alt);

	1943 } else {

	1944 center->AddAlternative(rest_alt);

	1945 center->AddAlternative(body_alt);

	1946 }

	1947 if (needs_counter) {

	1948 return ActionNode::StoreRegister(reg_ctr, 0, center);

	1949 } else {

	1950 return center;

	1951 }

	1952 }

	1953

	1954

	1955 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,

	1956 RegExpNode* on_success,

	1957 RegExpNode* on_failure) {

	1958 NodeInfo info;

	1959 switch (type()) {

	1960 case START_OF_LINE:

	1961 info.follows_newline_interest = true;

	1962 break;

	1963 case START_OF_INPUT:

	1964 info.follows_start_interest = true;

	1965 break;

	1966 case BOUNDARY: case NON_BOUNDARY:

	1967 info.follows_word_interest = true;

	1968 break;

	1969 case END_OF_INPUT:

	1970 info.at_end = true;

	1971 break;

	1972 case END_OF_LINE:

	1973 // This is wrong but has the effect of making the compiler abort.

	1974 info.at_end = true;

	1975 }

	1976 return on_success->PropagateForward(&info);

	1977 }

	1978

	1979

	1980 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,

	1981 RegExpNode* on_success,

	1982 RegExpNode* on_failure) {

	1983 return new BackReferenceNode(RegExpCapture::StartRegister(index()),

	1984 RegExpCapture::EndRegister(index()),

	1985 on_success,

	1986 on_failure);

	1987 }

	1988

	1989

	1990 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,

	1991 RegExpNode* on_success,

	1992 RegExpNode* on_failure) {

	1993 return on_success;

	1994 }

	1995

	1996

	1997 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,

	1998 RegExpNode* on_success,

	1999 RegExpNode* on_failure) {

	2000 int stack_pointer_register = compiler->AllocateRegister();

	2001 int position_register = compiler->AllocateRegister();

	2002 if (is_positive()) {

	2003 // begin submatch scope

	2004 // $reg = $pos

	2005 // if [body]

	2006 // then

	2007 // $pos = $reg

	2008 // escape submatch scope (drop all backtracks created in scope)

	2009 // succeed

	2010 // else

	2011 // end submatch scope (nothing to clean up, just exit the scope)

	2012 // fail

	2013 return ActionNode::BeginSubmatch(

	2014 stack_pointer_register,

	2015 position_register,

	2016 body()->ToNode(

	2017 compiler,

	2018 ActionNode::EscapeSubmatch(

	2019 stack_pointer_register,

	2020 true, // Also restore input position.

	2021 position_register,

	2022 on_success),

	2023 on_failure));

	2024 } else {

	2025 // begin submatch scope

	2026 // try

	2027 // first if (body)

	2028 // then

	2029 // escape submatch scope

	2030 // fail

	2031 // else

	2032 // backtrack

	2033 // second

	2034 // end submatch scope

	2035 // restore current position

	2036 // succeed

	2037 ChoiceNode* try_node =

	2038 new ChoiceNode(1, ActionNode::RestorePosition(position_register,

	2039 on_success));

	2040 RegExpNode* body_node = body()->ToNode(

	2041 compiler,

	2042 ActionNode::EscapeSubmatch(stack_pointer_register,

	2043 false, // Don't also restore position

	2044 0, // Unused arguments.

	2045 on_failure),

	2046 compiler->backtrack());

	2047 GuardedAlternative body_alt(body_node);

	2048 try_node->AddAlternative(body_alt);

	2049 return ActionNode::BeginSubmatch(stack_pointer_register,

	2050 position_register,

	2051 try_node);

	2052 }

	2053 }

	2054

	2055

	2056 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,

	2057 RegExpNode* on_success,

	2058 RegExpNode* on_failure) {

	2059 return ToNode(body(), index(), compiler, on_success, on_failure);

	2060 }

	2061

	2062

	2063 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,

	2064 int index,

	2065 RegExpCompiler* compiler,

	2066 RegExpNode* on_success,

	2067 RegExpNode* on_failure) {

	2068 int start_reg = RegExpCapture::StartRegister(index);

	2069 int end_reg = RegExpCapture::EndRegister(index);

	2070 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);

	2071 RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure);

	2072 return ActionNode::StorePosition(start_reg, body_node);

	2073 }

	2074

	2075

	2076 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,

	2077 RegExpNode* on_success,

	2078 RegExpNode* on_failure) {

	2079 ZoneList<RegExpTree> children = nodes();

	2080 RegExpNode* current = on_success;

	2081 for (int i = children->length() - 1; i >= 0; i--) {

	2082 current = children->at(i)->ToNode(compiler, current, on_failure);

	2083 }

	2084 return current;

	2085 }

	2086

	2087

	2088 static const int kSpaceRangeCount = 20;

	2089 static const uc16 kSpaceRanges[kSpaceRangeCount] = {

	2090 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,

	2091 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,

	2092 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000

	2093 };

	2094

	2095

	2096 static const int kWordRangeCount = 8;

	2097 static const uc16 kWordRanges[kWordRangeCount] = {

	2098 '0', '9', 'A', 'Z', '_', '_', 'a', 'z'

	2099 };

	2100

	2101

	2102 static const int kDigitRangeCount = 2;

	2103 static const uc16 kDigitRanges[kDigitRangeCount] = {

	2104 '0', '9'

	2105 };

	2106

	2107

	2108 static const int kLineTerminatorRangeCount = 6;

	2109 static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {

	2110 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029

	2111 };

	2112

	2113

	2114 static void AddClass(const uc16* elmv,

	2115 int elmc,

	2116 ZoneList<CharacterRange>* ranges) {

	2117 for (int i = 0; i < elmc; i += 2) {

	2118 ASSERT(elmv[i] <= elmv[i + 1]);

	2119 ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));

	2120 }

	2121 }

	2122

	2123

	2124 static void AddClassNegated(const uc16 *elmv,

	2125 int elmc,

	2126 ZoneList<CharacterRange>* ranges) {

	2127 ASSERT(elmv[0] != 0x0000);

	2128 ASSERT(elmv[elmc-1] != 0xFFFF);

	2129 uc16 last = 0x0000;

	2130 for (int i = 0; i < elmc; i += 2) {

	2131 ASSERT(last <= elmv[i] - 1);

	2132 ASSERT(elmv[i] <= elmv[i + 1]);

	2133 ranges->Add(CharacterRange(last, elmv[i] - 1));

	2134 last = elmv[i + 1] + 1;

	2135 }

	2136 ranges->Add(CharacterRange(last, 0xFFFF));

	2137 }

	2138

	2139

	2140 void CharacterRange::AddClassEscape(uc16 type,

	2141 ZoneList<CharacterRange>* ranges) {

	2142 switch (type) {

	2143 case 's':

	2144 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);

	2145 break;

	2146 case 'S':

	2147 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);

	2148 break;

	2149 case 'w':

	2150 AddClass(kWordRanges, kWordRangeCount, ranges);

	2151 break;

	2152 case 'W':

	2153 AddClassNegated(kWordRanges, kWordRangeCount, ranges);

	2154 break;

	2155 case 'd':

	2156 AddClass(kDigitRanges, kDigitRangeCount, ranges);

	2157 break;

	2158 case 'D':

	2159 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);

	2160 break;

	2161 case '.':

	2162 AddClassNegated(kLineTerminatorRanges,

	2163 kLineTerminatorRangeCount,

	2164 ranges);

	2165 break;

	2166 // This is not a character range as defined by the spec but a

	2167 // convenient shorthand for a character class that matches any

	2168 // character.

	2169 case '*':

	2170 ranges->Add(CharacterRange::Everything());

	2171 break;

	2172 default:

	2173 UNREACHABLE();

	2174 }

	2175 }

	2176

	2177

	2178 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {

	2179 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2180 if (IsSingleton()) {

	2181 // If this is a singleton we just expand the one character.

	2182 int length = uncanonicalize.get(from(), '\0', chars);

	2183 for (int i = 0; i < length; i++) {

	2184 uc32 chr = chars[i];

	2185 if (chr != from()) {

	2186 ranges->Add(CharacterRange::Singleton(chars[i]));

	2187 }

	2188 }

	2189 } else if (from() <= kRangeCanonicalizeMax &&

	2190 to() <= kRangeCanonicalizeMax) {

	2191 // If this is a range we expand the characters block by block,

	2192 // expanding contiguous subranges (blocks) one at a time.

	2193 // The approach is as follows. For a given start character we

	2194 // look up the block that contains it, for instance 'a' if the

	2195 // start character is 'c'. A block is characterized by the property

	2196 // that all characters uncanonicalize in the same way as the first

	2197 // element, except that each entry in the result is incremented

	2198 // by the distance from the first element. So a-z is a block

	2199 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter

	2200 // uncanonicalizes to ['a' + k, 'A' + k].

	2201 // Once we've found the start point we look up its uncanonicalization

	2202 // and produce a range for each element. For instance for [c-f]

	2203 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only

	2204 // add a range if it is not already contained in the input, so [c-f]

	2205 // will be skipped but [C-F] will be added. If this range is not

	2206 // completely contained in a block we do this for all the blocks

	2207 // covered by the range.

	2208 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2209 // First, look up the block that contains the 'from' character.

	2210 int length = canonrange.get(from(), '\0', range);

	2211 if (length == 0) {

	2212 range[0] = from();

	2213 } else {

	2214 ASSERT_EQ(1, length);

	2215 }

	2216 int pos = from();

	2217 // The start of the current block. Note that except for the first

	2218 // iteration 'start' is always equal to 'pos'.

	2219 int start;

	2220 // If it is not the start point of a block the entry contains the

	2221 // offset of the character from the start point.

	2222 if ((range[0] & kStartMarker) == 0) {

	2223 start = pos - range[0];

	2224 } else {

	2225 start = pos;

	2226 }

	2227 // Then we add the ranges on at a time, incrementing the current

	2228 // position to be after the last block each time. The position

	2229 // always points to the start of a block.

	2230 while (pos < to()) {

	2231 length = canonrange.get(start, '\0', range);

	2232 if (length == 0) {

	2233 range[0] = start;

	2234 } else {

	2235 ASSERT_EQ(1, length);

	2236 }

	2237 ASSERT((range[0] & kStartMarker) != 0);

	2238 // The start point of a block contains the distance to the end

	2239 // of the range.

	2240 int block_end = start + (range[0] & kPayloadMask) - 1;

	2241 int end = (block_end > to()) ? to() : block_end;

	2242 length = uncanonicalize.get(start, '\0', range);

	2243 for (int i = 0; i < length; i++) {

	2244 uc32 c = range[i];

	2245 uc16 range_from = c + (pos - start);

	2246 uc16 range_to = c + (end - start);

	2247 if (!(from() <= range_from && range_to <= to())) {

	2248 ranges->Add(CharacterRange(range_from, range_to));

	2249 }

	2250 }

	2251 start = pos = block_end + 1;

	2252 }

	2253 } else {

	2254 // TODO(plesner) when we've fixed the 2^11 bug in unibrow.

	2255 }

	2256 }

	2257

	2258

	2259 // -------------------------------------------------------------------

	2260 // Interest propagation

	2261

	2262

	2263 RegExpNode* RegExpNode::GetSibling(NodeInfo* info) {

	2264 for (int i = 0; i < siblings_.length(); i++) {

	2265 RegExpNode* sibling = siblings_.Get(i);

	2266 if (sibling->info()->HasSameForwardInterests(info))

	2267 return sibling;

	2268 }

	2269 return NULL;

	2270 }

	2271

	2272

	2273 template <class C>

	2274 static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {

	2275 NodeInfo full_info(*node->info());

	2276 full_info.AddFromPreceding(info);

	2277 RegExpNode* sibling = node->GetSibling(&full_info);

	2278 if (sibling != NULL) return sibling;

	2279 node->EnsureSiblings();

	2280 sibling = new C(*node);

	2281 sibling->info()->AddFromPreceding(&full_info);

	2282 node->AddSibling(sibling);

	2283 return sibling;

	2284 }

	2285

	2286

	2287 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {

	2288 NodeInfo full_info(*this->info());

	2289 full_info.AddFromPreceding(info);

	2290 RegExpNode* sibling = GetSibling(&full_info);

	2291 if (sibling != NULL) return sibling;

	2292 EnsureSiblings();

	2293 ActionNode* action = new ActionNode(*this);

	2294 action->info()->AddFromPreceding(&full_info);

	2295 AddSibling(action);

	2296 if (type_ != ESCAPE_SUBMATCH) {

	2297 action->set_on_success(action->on_success()->PropagateForward(info));

	2298 }

	2299 return action;

	2300 }

	2301

	2302

	2303 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {

	2304 NodeInfo full_info(*this->info());

	2305 full_info.AddFromPreceding(info);

	2306 RegExpNode* sibling = GetSibling(&full_info);

	2307 if (sibling != NULL) return sibling;

	2308 EnsureSiblings();

	2309 ChoiceNode* choice = new ChoiceNode(*this);

	2310 choice->info()->AddFromPreceding(&full_info);

	2311 AddSibling(choice);

	2312 ZoneList<GuardedAlternative>* old_alternatives = alternatives();

	2313 int count = old_alternatives->length();

	2314 choice->alternatives_ = new ZoneList<GuardedAlternative>(count);

	2315 for (int i = 0; i < count; i++) {

	2316 GuardedAlternative alternative = old_alternatives->at(i);

	2317 alternative.set_node(alternative.node()->PropagateForward(info));

	2318 choice->alternatives()->Add(alternative);

	2319 }

	2320 if (!choice->on_failure_->IsBacktrack()) {

	2321 choice->on_failure_ = choice->on_failure_->PropagateForward(info);

	2322 }

	2323 return choice;

	2324 }

	2325

	2326

	2327 RegExpNode* EndNode::PropagateForward(NodeInfo* info) {

	2328 return PropagateToEndpoint(this, info);

	2329 }

	2330

	2331

	2332 RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {

	2333 NodeInfo full_info(*this->info());

	2334 full_info.AddFromPreceding(info);

	2335 RegExpNode* sibling = GetSibling(&full_info);

	2336 if (sibling != NULL) return sibling;

	2337 EnsureSiblings();

	2338 BackReferenceNode* back_ref = new BackReferenceNode(*this);

	2339 back_ref->info()->AddFromPreceding(&full_info);

	2340 AddSibling(back_ref);

	2341 // TODO(erikcorry): A back reference has to have two successors (by default

	2342 // the same node). The first is used if the back reference matches a non-

	2343 // empty back reference, the second if it matches an empty one. This doesn't

	2344 // matter for at_end, which is the only one implemented right now, but it will

	2345 // matter for other pieces of info.

	2346 back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));

	2347 return back_ref;

	2348 }

	2349

	2350

	2351 RegExpNode* TextNode::PropagateForward(NodeInfo* info) {

	2352 return PropagateToEndpoint(this, info);

	2353 }

	2354

	2355

	2356 // -------------------------------------------------------------------

	2357 // Splay tree

	2358

	2359

	2360 OutSet* OutSet::Extend(unsigned value) {

	2361 if (Get(value))

	2362 return this;

	2363 if (successors() != NULL) {

	2364 for (int i = 0; i < successors()->length(); i++) {

	2365 OutSet* successor = successors()->at(i);

	2366 if (successor->Get(value))

	2367 return successor;

	2368 }

	2369 } else {

	2370 successors_ = new ZoneList<OutSet*>(2);

	2371 }

	2372 OutSet* result = new OutSet(first_, remaining_);

	2373 result->Set(value);

	2374 successors()->Add(result);

	2375 return result;

	2376 }

	2377

	2378

	2379 void OutSet::Set(unsigned value) {

	2380 if (value < kFirstLimit) {

	2381 first_ \|= (1 << value);

	2382 } else {

	2383 if (remaining_ == NULL)

	2384 remaining_ = new ZoneList<unsigned>(1);

	2385 if (remaining_->is_empty() \|\| !remaining_->Contains(value))

	2386 remaining_->Add(value);

	2387 }

	2388 }

	2389

	2390

	2391 bool OutSet::Get(unsigned value) {

	2392 if (value < kFirstLimit) {

	2393 return (first_ & (1 << value)) != 0;

	2394 } else if (remaining_ == NULL) {

	2395 return false;

	2396 } else {

	2397 return remaining_->Contains(value);

	2398 }

	2399 }

	2400

	2401

	2402 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;

	2403 const DispatchTable::Entry DispatchTable::Config::kNoValue;

	2404

	2405

	2406 void DispatchTable::AddRange(CharacterRange full_range, int value) {

	2407 CharacterRange current = full_range;

	2408 if (tree()->is_empty()) {

	2409 // If this is the first range we just insert into the table.

	2410 ZoneSplayTree<Config>::Locator loc;

	2411 ASSERT_RESULT(tree()->Insert(current.from(), &loc));

	2412 loc.set_value(Entry(current.from(), current.to(), empty()->Extend(value)));

	2413 return;

	2414 }

	2415 // First see if there is a range to the left of this one that

	2416 // overlaps.

	2417 ZoneSplayTree<Config>::Locator loc;

	2418 if (tree()->FindGreatestLessThan(current.from(), &loc)) {

	2419 Entry* entry = &loc.value();

	2420 // If we've found a range that overlaps with this one, and it

	2421 // starts strictly to the left of this one, we have to fix it

	2422 // because the following code only handles ranges that start on

	2423 // or after the start point of the range we're adding.

	2424 if (entry->from() < current.from() && entry->to() >= current.from()) {

	2425 // Snap the overlapping range in half around the start point of

	2426 // the range we're adding.

	2427 CharacterRange left(entry->from(), current.from() - 1);

	2428 CharacterRange right(current.from(), entry->to());

	2429 // The left part of the overlapping range doesn't overlap.

	2430 // Truncate the whole entry to be just the left part.

	2431 entry->set_to(left.to());

	2432 // The right part is the one that overlaps. We add this part

	2433 // to the map and let the next step deal with merging it with

	2434 // the range we're adding.

	2435 ZoneSplayTree<Config>::Locator loc;

	2436 ASSERT_RESULT(tree()->Insert(right.from(), &loc));

	2437 loc.set_value(Entry(right.from(),

	2438 right.to(),

	2439 entry->out_set()));

	2440 }

	2441 }

	2442 while (current.is_valid()) {

	2443 if (tree()->FindLeastGreaterThan(current.from(), &loc) &&

	2444 (loc.value().from() <= current.to()) &&

	2445 (loc.value().to() >= current.from())) {

	2446 Entry* entry = &loc.value();

	2447 // We have overlap. If there is space between the start point of

	2448 // the range we're adding and where the overlapping range starts

	2449 // then we have to add a range covering just that space.

	2450 if (current.from() < entry->from()) {

	2451 ZoneSplayTree<Config>::Locator ins;

	2452 ASSERT_RESULT(tree()->Insert(current.from(), &ins));

	2453 ins.set_value(Entry(current.from(),

	2454 entry->from() - 1,

	2455 empty()->Extend(value)));

	2456 current.set_from(entry->from());

	2457 }

	2458 ASSERT_EQ(current.from(), entry->from());

	2459 // If the overlapping range extends beyond the one we want to add

	2460 // we have to snap the right part off and add it separately.

	2461 if (entry->to() > current.to()) {

	2462 ZoneSplayTree<Config>::Locator ins;

	2463 ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));

	2464 ins.set_value(Entry(current.to() + 1,

	2465 entry->to(),

	2466 entry->out_set()));

	2467 entry->set_to(current.to());

	2468 }

	2469 ASSERT(entry->to() <= current.to());

	2470 // The overlapping range is now completely contained by the range

	2471 // we're adding so we can just update it and move the start point

	2472 // of the range we're adding just past it.

	2473 entry->AddValue(value);

	2474 // Bail out if the last interval ended at 0xFFFF since otherwise

	2475 // adding 1 will wrap around to 0.

	2476 if (entry->to() == 0xFFFF)

	2477 break;

	2478 ASSERT(entry->to() + 1 > current.from());

	2479 current.set_from(entry->to() + 1);

	2480 } else {

	2481 // There is no overlap so we can just add the range

	2482 ZoneSplayTree<Config>::Locator ins;

	2483 ASSERT_RESULT(tree()->Insert(current.from(), &ins));

	2484 ins.set_value(Entry(current.from(),

	2485 current.to(),

	2486 empty()->Extend(value)));

	2487 break;

	2488 }

	2489 }

	2490 }

	2491

	2492

	2493 OutSet* DispatchTable::Get(uc16 value) {

	2494 ZoneSplayTree<Config>::Locator loc;

	2495 if (!tree()->FindGreatestLessThan(value, &loc))

	2496 return empty();

	2497 Entry* entry = &loc.value();

	2498 if (value <= entry->to())

	2499 return entry->out_set();

	2500 else

	2501 return empty();

	2502 }

	2503

	2504

	2505 // -------------------------------------------------------------------

	2506 // Analysis

	2507

	2508

	2509 void Analysis::EnsureAnalyzed(RegExpNode* that) {

	2510 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)

	2511 return;

	2512 that->info()->being_analyzed = true;

	2513 that->Accept(this);

	2514 that->info()->being_analyzed = false;

	2515 that->info()->been_analyzed = true;

	2516 }

	2517

	2518

	2519 void Analysis::VisitEnd(EndNode* that) {

	2520 // nothing to do

	2521 }

	2522

	2523

	2524 void Analysis::VisitText(TextNode* that) {

	2525 if (ignore_case_) {

	2526 that->MakeCaseIndependent();

	2527 }

	2528 EnsureAnalyzed(that->on_success());

	2529 EnsureAnalyzed(that->on_failure());

	2530 NodeInfo* info = that->info();

	2531 NodeInfo* next_info = that->on_success()->info();

	2532 // If the following node is interested in what it follows then this

	2533 // node must determine it.

	2534 info->determine_newline = next_info->follows_newline_interest;

	2535 info->determine_word = next_info->follows_word_interest;

	2536 info->determine_start = next_info->follows_start_interest;

	2537 }

	2538

	2539

	2540 void Analysis::VisitAction(ActionNode* that) {

	2541 EnsureAnalyzed(that->on_success());

	2542 // If the next node is interested in what it follows then this node

	2543 // has to be interested too so it can pass the information on.

	2544 that->info()->AddFromFollowing(that->on_success()->info());

	2545 }

	2546

	2547

	2548 void Analysis::VisitChoice(ChoiceNode* that) {

	2549 NodeInfo* info = that->info();

	2550 for (int i = 0; i < that->alternatives()->length(); i++) {

	2551 RegExpNode* node = that->alternatives()->at(i).node();

	2552 EnsureAnalyzed(node);

	2553 // Anything the following nodes need to know has to be known by

	2554 // this node also, so it can pass it on.

	2555 info->AddFromFollowing(node->info());

	2556 }

	2557 if (!that->table_calculated()) {

	2558 DispatchTableConstructor cons(that->table());

	2559 cons.BuildTable(that);

	2560 }

	2561 EnsureAnalyzed(that->on_failure());

	2562 }

	2563

	2564

	2565 void Analysis::VisitBackReference(BackReferenceNode* that) {

	2566 EnsureAnalyzed(that->on_success());

	2567 EnsureAnalyzed(that->on_failure());

	2568 }

	2569

	2570

	2571 // -------------------------------------------------------------------

	2572 // Dispatch table construction

	2573

	2574

	2575 void DispatchTableConstructor::VisitEnd(EndNode* that) {

	2576 AddRange(CharacterRange::Everything());

	2577 }

	2578

	2579

	2580 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {

	2581 ASSERT(!node->table_calculated());

	2582 node->set_being_calculated(true);

	2583 ZoneList<GuardedAlternative>* alternatives = node->alternatives();

	2584 for (int i = 0; i < alternatives->length(); i++) {

	2585 set_choice_index(i);

	2586 alternatives->at(i).node()->Accept(this);

	2587 }

	2588 node->set_being_calculated(false);

	2589 node->set_table_calculated(true);

	2590 }

	2591

	2592

	2593 class AddDispatchRange {

	2594 public:

	2595 explicit AddDispatchRange(DispatchTableConstructor* constructor)

	2596 : constructor_(constructor) { }

	2597 void Call(uc32 from, DispatchTable::Entry entry);

	2598 private:

	2599 DispatchTableConstructor* constructor_;

	2600 };

	2601

	2602

	2603 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {

	2604 CharacterRange range(from, entry.to());

	2605 constructor_->AddRange(range);

	2606 }

	2607

	2608

	2609 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {

	2610 if (node->being_calculated())

	2611 return;

	2612 if (!node->table_calculated()) {

	2613 DispatchTableConstructor constructor(node->table());

	2614 constructor.BuildTable(node);

	2615 }

	2616 ASSERT(node->table_calculated());

	2617 AddDispatchRange adder(this);

	2618 node->table()->ForEach(&adder);

	2619 }

	2620

	2621

	2622 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {

	2623 // TODO(160): Find the node that we refer back to and propagate its start

	2624 // set back to here. For now we just accept anything.

	2625 AddRange(CharacterRange::Everything());

	2626 }

	2627

	2628

	2629

	2630 static int CompareRangeByFrom(const CharacterRange* a,

	2631 const CharacterRange* b) {

	2632 return Compare<uc16>(a->from(), b->from());

	2633 }

	2634

	2635

	2636 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {

	2637 ranges->Sort(CompareRangeByFrom);

	2638 uc16 last = 0;

	2639 for (int i = 0; i < ranges->length(); i++) {

	2640 CharacterRange range = ranges->at(i);

	2641 if (last < range.from())

	2642 AddRange(CharacterRange(last, range.from() - 1));

	2643 if (range.to() >= last) {

	2644 if (range.to() == 0xFFFF) {

	2645 return;

	2646 } else {

	2647 last = range.to() + 1;

	2648 }

	2649 }

	2650 }

	2651 AddRange(CharacterRange(last, 0xFFFF));

	2652 }

	2653

	2654

	2655 void DispatchTableConstructor::VisitText(TextNode* that) {

	2656 TextElement elm = that->elements()->at(0);

	2657 switch (elm.type) {

	2658 case TextElement::ATOM: {

	2659 uc16 c = elm.data.u_atom->data()[0];

	2660 AddRange(CharacterRange(c, c));

	2661 break;

	2662 }

	2663 case TextElement::CHAR_CLASS: {

	2664 RegExpCharacterClass* tree = elm.data.u_char_class;

	2665 ZoneList<CharacterRange>* ranges = tree->ranges();

	2666 if (tree->is_negated()) {

	2667 AddInverse(ranges);

	2668 } else {

	2669 for (int i = 0; i < ranges->length(); i++)

	2670 AddRange(ranges->at(i));

	2671 }

	2672 break;

	2673 }

	2674 default: {

	2675 UNIMPLEMENTED();

	2676 }

	2677 }

	2678 }

	2679

	2680

	2681 void DispatchTableConstructor::VisitAction(ActionNode* that) {

	2682 that->on_success()->Accept(this);

	2683 }

	2684

	2685

	2686 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,

	2687 RegExpNode** node_return,

	2688 bool ignore_case,

	2689 bool is_multiline) {

	2690 RegExpCompiler compiler(input->capture_count, ignore_case);

	2691 // Wrap the body of the regexp in capture #0.

	2692 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,

	2693 0,

	2694 &compiler,

	2695 compiler.accept(),

	2696 compiler.backtrack());

	2697 // Add a .*? at the beginning, outside the body capture.

	2698 // Note: We could choose to not add this if the regexp is anchored at

	2699 // the start of the input but I'm not sure how best to do that and

	2700 // since we don't even handle ^ yet I'm saving that optimization for

	2701 // later.

	2702 RegExpNode* node = RegExpQuantifier::ToNode(0,

	2703 RegExpQuantifier::kInfinity,

	2704 false,

	2705 new RegExpCharacterClass('*'),

	2706 &compiler,

	2707 captured_body,

	2708 compiler.backtrack());

	2709 if (node_return != NULL) *node_return = node;

	2710 Analysis analysis(ignore_case);

	2711 analysis.EnsureAnalyzed(node);

	2712

	2713 if (!FLAG_irregexp) {

	2714 return Handle<FixedArray>::null();

	2715 }

	2716

	2717 if (is_multiline && !FLAG_attempt_multiline_irregexp) {

	2718 return Handle<FixedArray>::null();

	2719 }

	2720

	2721 if (FLAG_irregexp_native) {

	2722 #ifdef ARM

	2723 UNIMPLEMENTED();

	2724 #else // IA32

	2725 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,

	2726 (input->capture_count + 1) * 2);

	2727 return compiler.Assemble(&macro_assembler,

	2728 node,

	2729 input->capture_count);

	2730 #endif

	2731 }

	2732 EmbeddedVector<byte, 1024> codes;

	2733 RegExpMacroAssemblerIrregexp macro_assembler(codes);

	2734 return compiler.Assemble(&macro_assembler,

	2735 node,

	2736 input->capture_count);

	2737 }

	2738

575	2739

576 }} // namespace v8::internal	2740 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | src/list.h » ('j') | no next file with comments »