Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Side by Side Diff: src/x87/regexp-macro-assembler-x87.cc

Issue 1285163003: Move regexp implementation into its own folder. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: addressed comment Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/x87/regexp-macro-assembler-x87.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/v8.h"
6
7 #if V8_TARGET_ARCH_X87
8
9 #include "src/cpu-profiler.h"
10 #include "src/log.h"
11 #include "src/macro-assembler.h"
12 #include "src/regexp-macro-assembler.h"
13 #include "src/regexp-stack.h"
14 #include "src/unicode.h"
15 #include "src/x87/regexp-macro-assembler-x87.h"
16
17 namespace v8 {
18 namespace internal {
19
20 #ifndef V8_INTERPRETED_REGEXP
21 /*
22 * This assembler uses the following register assignment convention
23 * - edx : Current character. Must be loaded using LoadCurrentCharacter
24 * before using any of the dispatch methods. Temporarily stores the
25 * index of capture start after a matching pass for a global regexp.
26 * - edi : Current position in input, as negative offset from end of string.
27 * Please notice that this is the byte offset, not the character offset!
28 * - esi : end of input (points to byte after last character in input).
29 * - ebp : Frame pointer. Used to access arguments, local variables and
30 * RegExp registers.
31 * - esp : Points to tip of C stack.
32 * - ecx : Points to tip of backtrack stack
33 *
34 * The registers eax and ebx are free to use for computations.
35 *
36 * Each call to a public method should retain this convention.
37 * The stack will have the following structure:
38 * - Isolate* isolate (address of the current isolate)
39 * - direct_call (if 1, direct call from JavaScript code, if 0
40 * call through the runtime system)
41 * - stack_area_base (high end of the memory area to use as
42 * backtracking stack)
43 * - capture array size (may fit multiple sets of matches)
44 * - int* capture_array (int[num_saved_registers_], for output).
45 * - end of input (address of end of string)
46 * - start of input (address of first character in string)
47 * - start index (character index of start)
48 * - String* input_string (location of a handle containing the string)
49 * --- frame alignment (if applicable) ---
50 * - return address
51 * ebp-> - old ebp
52 * - backup of caller esi
53 * - backup of caller edi
54 * - backup of caller ebx
55 * - success counter (only for global regexps to count matches).
56 * - Offset of location before start of input (effectively character
57 * position -1). Used to initialize capture registers to a non-position.
58 * - register 0 ebp[-4] (only positions must be stored in the first
59 * - register 1 ebp[-8] num_saved_registers_ registers)
60 * - ...
61 *
62 * The first num_saved_registers_ registers are initialized to point to
63 * "character -1" in the string (i.e., char_size() bytes before the first
64 * character of the string). The remaining registers starts out as garbage.
65 *
66 * The data up to the return address must be placed there by the calling
67 * code, by calling the code entry as cast to a function with the signature:
68 * int (*match)(String* input_string,
69 * int start_index,
70 * Address start,
71 * Address end,
72 * int* capture_output_array,
73 * bool at_start,
74 * byte* stack_area_base,
75 * bool direct_call)
76 */
77
78 #define __ ACCESS_MASM(masm_)
79
80 RegExpMacroAssemblerX87::RegExpMacroAssemblerX87(Isolate* isolate, Zone* zone,
81 Mode mode,
82 int registers_to_save)
83 : NativeRegExpMacroAssembler(isolate, zone),
84 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
85 mode_(mode),
86 num_registers_(registers_to_save),
87 num_saved_registers_(registers_to_save),
88 entry_label_(),
89 start_label_(),
90 success_label_(),
91 backtrack_label_(),
92 exit_label_() {
93 DCHECK_EQ(0, registers_to_save % 2);
94 __ jmp(&entry_label_); // We'll write the entry code later.
95 __ bind(&start_label_); // And then continue from here.
96 }
97
98
99 RegExpMacroAssemblerX87::~RegExpMacroAssemblerX87() {
100 delete masm_;
101 // Unuse labels in case we throw away the assembler without calling GetCode.
102 entry_label_.Unuse();
103 start_label_.Unuse();
104 success_label_.Unuse();
105 backtrack_label_.Unuse();
106 exit_label_.Unuse();
107 check_preempt_label_.Unuse();
108 stack_overflow_label_.Unuse();
109 }
110
111
112 int RegExpMacroAssemblerX87::stack_limit_slack() {
113 return RegExpStack::kStackLimitSlack;
114 }
115
116
117 void RegExpMacroAssemblerX87::AdvanceCurrentPosition(int by) {
118 if (by != 0) {
119 __ add(edi, Immediate(by * char_size()));
120 }
121 }
122
123
124 void RegExpMacroAssemblerX87::AdvanceRegister(int reg, int by) {
125 DCHECK(reg >= 0);
126 DCHECK(reg < num_registers_);
127 if (by != 0) {
128 __ add(register_location(reg), Immediate(by));
129 }
130 }
131
132
133 void RegExpMacroAssemblerX87::Backtrack() {
134 CheckPreemption();
135 // Pop Code* offset from backtrack stack, add Code* and jump to location.
136 Pop(ebx);
137 __ add(ebx, Immediate(masm_->CodeObject()));
138 __ jmp(ebx);
139 }
140
141
142 void RegExpMacroAssemblerX87::Bind(Label* label) {
143 __ bind(label);
144 }
145
146
147 void RegExpMacroAssemblerX87::CheckCharacter(uint32_t c, Label* on_equal) {
148 __ cmp(current_character(), c);
149 BranchOrBacktrack(equal, on_equal);
150 }
151
152
153 void RegExpMacroAssemblerX87::CheckCharacterGT(uc16 limit, Label* on_greater) {
154 __ cmp(current_character(), limit);
155 BranchOrBacktrack(greater, on_greater);
156 }
157
158
159 void RegExpMacroAssemblerX87::CheckAtStart(Label* on_at_start) {
160 Label not_at_start;
161 // Did we start the match at the start of the string at all?
162 __ cmp(Operand(ebp, kStartIndex), Immediate(0));
163 BranchOrBacktrack(not_equal, &not_at_start);
164 // If we did, are we still at the start of the input?
165 __ lea(eax, Operand(esi, edi, times_1, 0));
166 __ cmp(eax, Operand(ebp, kInputStart));
167 BranchOrBacktrack(equal, on_at_start);
168 __ bind(&not_at_start);
169 }
170
171
172 void RegExpMacroAssemblerX87::CheckNotAtStart(Label* on_not_at_start) {
173 // Did we start the match at the start of the string at all?
174 __ cmp(Operand(ebp, kStartIndex), Immediate(0));
175 BranchOrBacktrack(not_equal, on_not_at_start);
176 // If we did, are we still at the start of the input?
177 __ lea(eax, Operand(esi, edi, times_1, 0));
178 __ cmp(eax, Operand(ebp, kInputStart));
179 BranchOrBacktrack(not_equal, on_not_at_start);
180 }
181
182
183 void RegExpMacroAssemblerX87::CheckCharacterLT(uc16 limit, Label* on_less) {
184 __ cmp(current_character(), limit);
185 BranchOrBacktrack(less, on_less);
186 }
187
188
189 void RegExpMacroAssemblerX87::CheckGreedyLoop(Label* on_equal) {
190 Label fallthrough;
191 __ cmp(edi, Operand(backtrack_stackpointer(), 0));
192 __ j(not_equal, &fallthrough);
193 __ add(backtrack_stackpointer(), Immediate(kPointerSize)); // Pop.
194 BranchOrBacktrack(no_condition, on_equal);
195 __ bind(&fallthrough);
196 }
197
198
199 void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
200 int start_reg,
201 Label* on_no_match) {
202 Label fallthrough;
203 __ mov(edx, register_location(start_reg)); // Index of start of capture
204 __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
205 __ sub(ebx, edx); // Length of capture.
206
207 // The length of a capture should not be negative. This can only happen
208 // if the end of the capture is unrecorded, or at a point earlier than
209 // the start of the capture.
210 BranchOrBacktrack(less, on_no_match);
211
212 // If length is zero, either the capture is empty or it is completely
213 // uncaptured. In either case succeed immediately.
214 __ j(equal, &fallthrough);
215
216 // Check that there are sufficient characters left in the input.
217 __ mov(eax, edi);
218 __ add(eax, ebx);
219 BranchOrBacktrack(greater, on_no_match);
220
221 if (mode_ == LATIN1) {
222 Label success;
223 Label fail;
224 Label loop_increment;
225 // Save register contents to make the registers available below.
226 __ push(edi);
227 __ push(backtrack_stackpointer());
228 // After this, the eax, ecx, and edi registers are available.
229
230 __ add(edx, esi); // Start of capture
231 __ add(edi, esi); // Start of text to match against capture.
232 __ add(ebx, edi); // End of text to match against capture.
233
234 Label loop;
235 __ bind(&loop);
236 __ movzx_b(eax, Operand(edi, 0));
237 __ cmpb_al(Operand(edx, 0));
238 __ j(equal, &loop_increment);
239
240 // Mismatch, try case-insensitive match (converting letters to lower-case).
241 __ or_(eax, 0x20); // Convert match character to lower-case.
242 __ lea(ecx, Operand(eax, -'a'));
243 __ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter?
244 Label convert_capture;
245 __ j(below_equal, &convert_capture); // In range 'a'-'z'.
246 // Latin-1: Check for values in range [224,254] but not 247.
247 __ sub(ecx, Immediate(224 - 'a'));
248 __ cmp(ecx, Immediate(254 - 224));
249 __ j(above, &fail); // Weren't Latin-1 letters.
250 __ cmp(ecx, Immediate(247 - 224)); // Check for 247.
251 __ j(equal, &fail);
252 __ bind(&convert_capture);
253 // Also convert capture character.
254 __ movzx_b(ecx, Operand(edx, 0));
255 __ or_(ecx, 0x20);
256
257 __ cmp(eax, ecx);
258 __ j(not_equal, &fail);
259
260 __ bind(&loop_increment);
261 // Increment pointers into match and capture strings.
262 __ add(edx, Immediate(1));
263 __ add(edi, Immediate(1));
264 // Compare to end of match, and loop if not done.
265 __ cmp(edi, ebx);
266 __ j(below, &loop);
267 __ jmp(&success);
268
269 __ bind(&fail);
270 // Restore original values before failing.
271 __ pop(backtrack_stackpointer());
272 __ pop(edi);
273 BranchOrBacktrack(no_condition, on_no_match);
274
275 __ bind(&success);
276 // Restore original value before continuing.
277 __ pop(backtrack_stackpointer());
278 // Drop original value of character position.
279 __ add(esp, Immediate(kPointerSize));
280 // Compute new value of character position after the matched part.
281 __ sub(edi, esi);
282 } else {
283 DCHECK(mode_ == UC16);
284 // Save registers before calling C function.
285 __ push(esi);
286 __ push(edi);
287 __ push(backtrack_stackpointer());
288 __ push(ebx);
289
290 static const int argument_count = 4;
291 __ PrepareCallCFunction(argument_count, ecx);
292 // Put arguments into allocated stack area, last argument highest on stack.
293 // Parameters are
294 // Address byte_offset1 - Address captured substring's start.
295 // Address byte_offset2 - Address of current character position.
296 // size_t byte_length - length of capture in bytes(!)
297 // Isolate* isolate
298
299 // Set isolate.
300 __ mov(Operand(esp, 3 * kPointerSize),
301 Immediate(ExternalReference::isolate_address(isolate())));
302 // Set byte_length.
303 __ mov(Operand(esp, 2 * kPointerSize), ebx);
304 // Set byte_offset2.
305 // Found by adding negative string-end offset of current position (edi)
306 // to end of string.
307 __ add(edi, esi);
308 __ mov(Operand(esp, 1 * kPointerSize), edi);
309 // Set byte_offset1.
310 // Start of capture, where edx already holds string-end negative offset.
311 __ add(edx, esi);
312 __ mov(Operand(esp, 0 * kPointerSize), edx);
313
314 {
315 AllowExternalCallThatCantCauseGC scope(masm_);
316 ExternalReference compare =
317 ExternalReference::re_case_insensitive_compare_uc16(isolate());
318 __ CallCFunction(compare, argument_count);
319 }
320 // Pop original values before reacting on result value.
321 __ pop(ebx);
322 __ pop(backtrack_stackpointer());
323 __ pop(edi);
324 __ pop(esi);
325
326 // Check if function returned non-zero for success or zero for failure.
327 __ or_(eax, eax);
328 BranchOrBacktrack(zero, on_no_match);
329 // On success, increment position by length of capture.
330 __ add(edi, ebx);
331 }
332 __ bind(&fallthrough);
333 }
334
335
336 void RegExpMacroAssemblerX87::CheckNotBackReference(
337 int start_reg,
338 Label* on_no_match) {
339 Label fallthrough;
340 Label success;
341 Label fail;
342
343 // Find length of back-referenced capture.
344 __ mov(edx, register_location(start_reg));
345 __ mov(eax, register_location(start_reg + 1));
346 __ sub(eax, edx); // Length to check.
347 // Fail on partial or illegal capture (start of capture after end of capture).
348 BranchOrBacktrack(less, on_no_match);
349 // Succeed on empty capture (including no capture)
350 __ j(equal, &fallthrough);
351
352 // Check that there are sufficient characters left in the input.
353 __ mov(ebx, edi);
354 __ add(ebx, eax);
355 BranchOrBacktrack(greater, on_no_match);
356
357 // Save register to make it available below.
358 __ push(backtrack_stackpointer());
359
360 // Compute pointers to match string and capture string
361 __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
362 __ add(edx, esi); // Start of capture.
363 __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match
364
365 Label loop;
366 __ bind(&loop);
367 if (mode_ == LATIN1) {
368 __ movzx_b(eax, Operand(edx, 0));
369 __ cmpb_al(Operand(ebx, 0));
370 } else {
371 DCHECK(mode_ == UC16);
372 __ movzx_w(eax, Operand(edx, 0));
373 __ cmpw_ax(Operand(ebx, 0));
374 }
375 __ j(not_equal, &fail);
376 // Increment pointers into capture and match string.
377 __ add(edx, Immediate(char_size()));
378 __ add(ebx, Immediate(char_size()));
379 // Check if we have reached end of match area.
380 __ cmp(ebx, ecx);
381 __ j(below, &loop);
382 __ jmp(&success);
383
384 __ bind(&fail);
385 // Restore backtrack stackpointer.
386 __ pop(backtrack_stackpointer());
387 BranchOrBacktrack(no_condition, on_no_match);
388
389 __ bind(&success);
390 // Move current character position to position after match.
391 __ mov(edi, ecx);
392 __ sub(edi, esi);
393 // Restore backtrack stackpointer.
394 __ pop(backtrack_stackpointer());
395
396 __ bind(&fallthrough);
397 }
398
399
400 void RegExpMacroAssemblerX87::CheckNotCharacter(uint32_t c,
401 Label* on_not_equal) {
402 __ cmp(current_character(), c);
403 BranchOrBacktrack(not_equal, on_not_equal);
404 }
405
406
407 void RegExpMacroAssemblerX87::CheckCharacterAfterAnd(uint32_t c,
408 uint32_t mask,
409 Label* on_equal) {
410 if (c == 0) {
411 __ test(current_character(), Immediate(mask));
412 } else {
413 __ mov(eax, mask);
414 __ and_(eax, current_character());
415 __ cmp(eax, c);
416 }
417 BranchOrBacktrack(equal, on_equal);
418 }
419
420
421 void RegExpMacroAssemblerX87::CheckNotCharacterAfterAnd(uint32_t c,
422 uint32_t mask,
423 Label* on_not_equal) {
424 if (c == 0) {
425 __ test(current_character(), Immediate(mask));
426 } else {
427 __ mov(eax, mask);
428 __ and_(eax, current_character());
429 __ cmp(eax, c);
430 }
431 BranchOrBacktrack(not_equal, on_not_equal);
432 }
433
434
435 void RegExpMacroAssemblerX87::CheckNotCharacterAfterMinusAnd(
436 uc16 c,
437 uc16 minus,
438 uc16 mask,
439 Label* on_not_equal) {
440 DCHECK(minus < String::kMaxUtf16CodeUnit);
441 __ lea(eax, Operand(current_character(), -minus));
442 if (c == 0) {
443 __ test(eax, Immediate(mask));
444 } else {
445 __ and_(eax, mask);
446 __ cmp(eax, c);
447 }
448 BranchOrBacktrack(not_equal, on_not_equal);
449 }
450
451
452 void RegExpMacroAssemblerX87::CheckCharacterInRange(
453 uc16 from,
454 uc16 to,
455 Label* on_in_range) {
456 __ lea(eax, Operand(current_character(), -from));
457 __ cmp(eax, to - from);
458 BranchOrBacktrack(below_equal, on_in_range);
459 }
460
461
462 void RegExpMacroAssemblerX87::CheckCharacterNotInRange(
463 uc16 from,
464 uc16 to,
465 Label* on_not_in_range) {
466 __ lea(eax, Operand(current_character(), -from));
467 __ cmp(eax, to - from);
468 BranchOrBacktrack(above, on_not_in_range);
469 }
470
471
472 void RegExpMacroAssemblerX87::CheckBitInTable(
473 Handle<ByteArray> table,
474 Label* on_bit_set) {
475 __ mov(eax, Immediate(table));
476 Register index = current_character();
477 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
478 __ mov(ebx, kTableSize - 1);
479 __ and_(ebx, current_character());
480 index = ebx;
481 }
482 __ cmpb(FieldOperand(eax, index, times_1, ByteArray::kHeaderSize), 0);
483 BranchOrBacktrack(not_equal, on_bit_set);
484 }
485
486
487 bool RegExpMacroAssemblerX87::CheckSpecialCharacterClass(uc16 type,
488 Label* on_no_match) {
489 // Range checks (c in min..max) are generally implemented by an unsigned
490 // (c - min) <= (max - min) check
491 switch (type) {
492 case 's':
493 // Match space-characters
494 if (mode_ == LATIN1) {
495 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
496 Label success;
497 __ cmp(current_character(), ' ');
498 __ j(equal, &success, Label::kNear);
499 // Check range 0x09..0x0d
500 __ lea(eax, Operand(current_character(), -'\t'));
501 __ cmp(eax, '\r' - '\t');
502 __ j(below_equal, &success, Label::kNear);
503 // \u00a0 (NBSP).
504 __ cmp(eax, 0x00a0 - '\t');
505 BranchOrBacktrack(not_equal, on_no_match);
506 __ bind(&success);
507 return true;
508 }
509 return false;
510 case 'S':
511 // The emitted code for generic character classes is good enough.
512 return false;
513 case 'd':
514 // Match ASCII digits ('0'..'9')
515 __ lea(eax, Operand(current_character(), -'0'));
516 __ cmp(eax, '9' - '0');
517 BranchOrBacktrack(above, on_no_match);
518 return true;
519 case 'D':
520 // Match non ASCII-digits
521 __ lea(eax, Operand(current_character(), -'0'));
522 __ cmp(eax, '9' - '0');
523 BranchOrBacktrack(below_equal, on_no_match);
524 return true;
525 case '.': {
526 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
527 __ mov(eax, current_character());
528 __ xor_(eax, Immediate(0x01));
529 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
530 __ sub(eax, Immediate(0x0b));
531 __ cmp(eax, 0x0c - 0x0b);
532 BranchOrBacktrack(below_equal, on_no_match);
533 if (mode_ == UC16) {
534 // Compare original value to 0x2028 and 0x2029, using the already
535 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
536 // 0x201d (0x2028 - 0x0b) or 0x201e.
537 __ sub(eax, Immediate(0x2028 - 0x0b));
538 __ cmp(eax, 0x2029 - 0x2028);
539 BranchOrBacktrack(below_equal, on_no_match);
540 }
541 return true;
542 }
543 case 'w': {
544 if (mode_ != LATIN1) {
545 // Table is 256 entries, so all Latin1 characters can be tested.
546 __ cmp(current_character(), Immediate('z'));
547 BranchOrBacktrack(above, on_no_match);
548 }
549 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
550 ExternalReference word_map = ExternalReference::re_word_character_map();
551 __ test_b(current_character(),
552 Operand::StaticArray(current_character(), times_1, word_map));
553 BranchOrBacktrack(zero, on_no_match);
554 return true;
555 }
556 case 'W': {
557 Label done;
558 if (mode_ != LATIN1) {
559 // Table is 256 entries, so all Latin1 characters can be tested.
560 __ cmp(current_character(), Immediate('z'));
561 __ j(above, &done);
562 }
563 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
564 ExternalReference word_map = ExternalReference::re_word_character_map();
565 __ test_b(current_character(),
566 Operand::StaticArray(current_character(), times_1, word_map));
567 BranchOrBacktrack(not_zero, on_no_match);
568 if (mode_ != LATIN1) {
569 __ bind(&done);
570 }
571 return true;
572 }
573 // Non-standard classes (with no syntactic shorthand) used internally.
574 case '*':
575 // Match any character.
576 return true;
577 case 'n': {
578 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
579 // The opposite of '.'.
580 __ mov(eax, current_character());
581 __ xor_(eax, Immediate(0x01));
582 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
583 __ sub(eax, Immediate(0x0b));
584 __ cmp(eax, 0x0c - 0x0b);
585 if (mode_ == LATIN1) {
586 BranchOrBacktrack(above, on_no_match);
587 } else {
588 Label done;
589 BranchOrBacktrack(below_equal, &done);
590 DCHECK_EQ(UC16, mode_);
591 // Compare original value to 0x2028 and 0x2029, using the already
592 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
593 // 0x201d (0x2028 - 0x0b) or 0x201e.
594 __ sub(eax, Immediate(0x2028 - 0x0b));
595 __ cmp(eax, 1);
596 BranchOrBacktrack(above, on_no_match);
597 __ bind(&done);
598 }
599 return true;
600 }
601 // No custom implementation (yet): s(UC16), S(UC16).
602 default:
603 return false;
604 }
605 }
606
607
608 void RegExpMacroAssemblerX87::Fail() {
609 STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
610 if (!global()) {
611 __ Move(eax, Immediate(FAILURE));
612 }
613 __ jmp(&exit_label_);
614 }
615
616
617 Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) {
618 Label return_eax;
619 // Finalize code - write the entry point code now we know how many
620 // registers we need.
621
622 // Entry code:
623 __ bind(&entry_label_);
624
625 // Tell the system that we have a stack frame. Because the type is MANUAL, no
626 // code is generated.
627 FrameScope scope(masm_, StackFrame::MANUAL);
628
629 // Actually emit code to start a new stack frame.
630 __ push(ebp);
631 __ mov(ebp, esp);
632 // Save callee-save registers. Order here should correspond to order of
633 // kBackup_ebx etc.
634 __ push(esi);
635 __ push(edi);
636 __ push(ebx); // Callee-save on MacOS.
637 __ push(Immediate(0)); // Number of successful matches in a global regexp.
638 __ push(Immediate(0)); // Make room for "input start - 1" constant.
639
640 // Check if we have space on the stack for registers.
641 Label stack_limit_hit;
642 Label stack_ok;
643
644 ExternalReference stack_limit =
645 ExternalReference::address_of_stack_limit(isolate());
646 __ mov(ecx, esp);
647 __ sub(ecx, Operand::StaticVariable(stack_limit));
648 // Handle it if the stack pointer is already below the stack limit.
649 __ j(below_equal, &stack_limit_hit);
650 // Check if there is room for the variable number of registers above
651 // the stack limit.
652 __ cmp(ecx, num_registers_ * kPointerSize);
653 __ j(above_equal, &stack_ok);
654 // Exit with OutOfMemory exception. There is not enough space on the stack
655 // for our working registers.
656 __ mov(eax, EXCEPTION);
657 __ jmp(&return_eax);
658
659 __ bind(&stack_limit_hit);
660 CallCheckStackGuardState(ebx);
661 __ or_(eax, eax);
662 // If returned value is non-zero, we exit with the returned value as result.
663 __ j(not_zero, &return_eax);
664
665 __ bind(&stack_ok);
666 // Load start index for later use.
667 __ mov(ebx, Operand(ebp, kStartIndex));
668
669 // Allocate space on stack for registers.
670 __ sub(esp, Immediate(num_registers_ * kPointerSize));
671 // Load string length.
672 __ mov(esi, Operand(ebp, kInputEnd));
673 // Load input position.
674 __ mov(edi, Operand(ebp, kInputStart));
675 // Set up edi to be negative offset from string end.
676 __ sub(edi, esi);
677
678 // Set eax to address of char before start of the string.
679 // (effectively string position -1).
680 __ neg(ebx);
681 if (mode_ == UC16) {
682 __ lea(eax, Operand(edi, ebx, times_2, -char_size()));
683 } else {
684 __ lea(eax, Operand(edi, ebx, times_1, -char_size()));
685 }
686 // Store this value in a local variable, for use when clearing
687 // position registers.
688 __ mov(Operand(ebp, kInputStartMinusOne), eax);
689
690 #if V8_OS_WIN
691 // Ensure that we write to each stack page, in order. Skipping a page
692 // on Windows can cause segmentation faults. Assuming page size is 4k.
693 const int kPageSize = 4096;
694 const int kRegistersPerPage = kPageSize / kPointerSize;
695 for (int i = num_saved_registers_ + kRegistersPerPage - 1;
696 i < num_registers_;
697 i += kRegistersPerPage) {
698 __ mov(register_location(i), eax); // One write every page.
699 }
700 #endif // V8_OS_WIN
701
702 Label load_char_start_regexp, start_regexp;
703 // Load newline if index is at start, previous character otherwise.
704 __ cmp(Operand(ebp, kStartIndex), Immediate(0));
705 __ j(not_equal, &load_char_start_regexp, Label::kNear);
706 __ mov(current_character(), '\n');
707 __ jmp(&start_regexp, Label::kNear);
708
709 // Global regexp restarts matching here.
710 __ bind(&load_char_start_regexp);
711 // Load previous char as initial value of current character register.
712 LoadCurrentCharacterUnchecked(-1, 1);
713 __ bind(&start_regexp);
714
715 // Initialize on-stack registers.
716 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
717 // Fill saved registers with initial value = start offset - 1
718 // Fill in stack push order, to avoid accessing across an unwritten
719 // page (a problem on Windows).
720 if (num_saved_registers_ > 8) {
721 __ mov(ecx, kRegisterZero);
722 Label init_loop;
723 __ bind(&init_loop);
724 __ mov(Operand(ebp, ecx, times_1, 0), eax);
725 __ sub(ecx, Immediate(kPointerSize));
726 __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
727 __ j(greater, &init_loop);
728 } else { // Unroll the loop.
729 for (int i = 0; i < num_saved_registers_; i++) {
730 __ mov(register_location(i), eax);
731 }
732 }
733 }
734
735 // Initialize backtrack stack pointer.
736 __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
737
738 __ jmp(&start_label_);
739
740 // Exit code:
741 if (success_label_.is_linked()) {
742 // Save captures when successful.
743 __ bind(&success_label_);
744 if (num_saved_registers_ > 0) {
745 // copy captures to output
746 __ mov(ebx, Operand(ebp, kRegisterOutput));
747 __ mov(ecx, Operand(ebp, kInputEnd));
748 __ mov(edx, Operand(ebp, kStartIndex));
749 __ sub(ecx, Operand(ebp, kInputStart));
750 if (mode_ == UC16) {
751 __ lea(ecx, Operand(ecx, edx, times_2, 0));
752 } else {
753 __ add(ecx, edx);
754 }
755 for (int i = 0; i < num_saved_registers_; i++) {
756 __ mov(eax, register_location(i));
757 if (i == 0 && global_with_zero_length_check()) {
758 // Keep capture start in edx for the zero-length check later.
759 __ mov(edx, eax);
760 }
761 // Convert to index from start of string, not end.
762 __ add(eax, ecx);
763 if (mode_ == UC16) {
764 __ sar(eax, 1); // Convert byte index to character index.
765 }
766 __ mov(Operand(ebx, i * kPointerSize), eax);
767 }
768 }
769
770 if (global()) {
771 // Restart matching if the regular expression is flagged as global.
772 // Increment success counter.
773 __ inc(Operand(ebp, kSuccessfulCaptures));
774 // Capture results have been stored, so the number of remaining global
775 // output registers is reduced by the number of stored captures.
776 __ mov(ecx, Operand(ebp, kNumOutputRegisters));
777 __ sub(ecx, Immediate(num_saved_registers_));
778 // Check whether we have enough room for another set of capture results.
779 __ cmp(ecx, Immediate(num_saved_registers_));
780 __ j(less, &exit_label_);
781
782 __ mov(Operand(ebp, kNumOutputRegisters), ecx);
783 // Advance the location for output.
784 __ add(Operand(ebp, kRegisterOutput),
785 Immediate(num_saved_registers_ * kPointerSize));
786
787 // Prepare eax to initialize registers with its value in the next run.
788 __ mov(eax, Operand(ebp, kInputStartMinusOne));
789
790 if (global_with_zero_length_check()) {
791 // Special case for zero-length matches.
792 // edx: capture start index
793 __ cmp(edi, edx);
794 // Not a zero-length match, restart.
795 __ j(not_equal, &load_char_start_regexp);
796 // edi (offset from the end) is zero if we already reached the end.
797 __ test(edi, edi);
798 __ j(zero, &exit_label_, Label::kNear);
799 // Advance current position after a zero-length match.
800 if (mode_ == UC16) {
801 __ add(edi, Immediate(2));
802 } else {
803 __ inc(edi);
804 }
805 }
806
807 __ jmp(&load_char_start_regexp);
808 } else {
809 __ mov(eax, Immediate(SUCCESS));
810 }
811 }
812
813 __ bind(&exit_label_);
814 if (global()) {
815 // Return the number of successful captures.
816 __ mov(eax, Operand(ebp, kSuccessfulCaptures));
817 }
818
819 __ bind(&return_eax);
820 // Skip esp past regexp registers.
821 __ lea(esp, Operand(ebp, kBackup_ebx));
822 // Restore callee-save registers.
823 __ pop(ebx);
824 __ pop(edi);
825 __ pop(esi);
826 // Exit function frame, restore previous one.
827 __ pop(ebp);
828 __ ret(0);
829
830 // Backtrack code (branch target for conditional backtracks).
831 if (backtrack_label_.is_linked()) {
832 __ bind(&backtrack_label_);
833 Backtrack();
834 }
835
836 Label exit_with_exception;
837
838 // Preempt-code
839 if (check_preempt_label_.is_linked()) {
840 SafeCallTarget(&check_preempt_label_);
841
842 __ push(backtrack_stackpointer());
843 __ push(edi);
844
845 CallCheckStackGuardState(ebx);
846 __ or_(eax, eax);
847 // If returning non-zero, we should end execution with the given
848 // result as return value.
849 __ j(not_zero, &return_eax);
850
851 __ pop(edi);
852 __ pop(backtrack_stackpointer());
853 // String might have moved: Reload esi from frame.
854 __ mov(esi, Operand(ebp, kInputEnd));
855 SafeReturn();
856 }
857
858 // Backtrack stack overflow code.
859 if (stack_overflow_label_.is_linked()) {
860 SafeCallTarget(&stack_overflow_label_);
861 // Reached if the backtrack-stack limit has been hit.
862
863 Label grow_failed;
864 // Save registers before calling C function
865 __ push(esi);
866 __ push(edi);
867
868 // Call GrowStack(backtrack_stackpointer())
869 static const int num_arguments = 3;
870 __ PrepareCallCFunction(num_arguments, ebx);
871 __ mov(Operand(esp, 2 * kPointerSize),
872 Immediate(ExternalReference::isolate_address(isolate())));
873 __ lea(eax, Operand(ebp, kStackHighEnd));
874 __ mov(Operand(esp, 1 * kPointerSize), eax);
875 __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer());
876 ExternalReference grow_stack =
877 ExternalReference::re_grow_stack(isolate());
878 __ CallCFunction(grow_stack, num_arguments);
879 // If return NULL, we have failed to grow the stack, and
880 // must exit with a stack-overflow exception.
881 __ or_(eax, eax);
882 __ j(equal, &exit_with_exception);
883 // Otherwise use return value as new stack pointer.
884 __ mov(backtrack_stackpointer(), eax);
885 // Restore saved registers and continue.
886 __ pop(edi);
887 __ pop(esi);
888 SafeReturn();
889 }
890
891 if (exit_with_exception.is_linked()) {
892 // If any of the code above needed to exit with an exception.
893 __ bind(&exit_with_exception);
894 // Exit with Result EXCEPTION(-1) to signal thrown exception.
895 __ mov(eax, EXCEPTION);
896 __ jmp(&return_eax);
897 }
898
899 CodeDesc code_desc;
900 masm_->GetCode(&code_desc);
901 Handle<Code> code =
902 isolate()->factory()->NewCode(code_desc,
903 Code::ComputeFlags(Code::REGEXP),
904 masm_->CodeObject());
905 PROFILE(isolate(), RegExpCodeCreateEvent(*code, *source));
906 return Handle<HeapObject>::cast(code);
907 }
908
909
910 void RegExpMacroAssemblerX87::GoTo(Label* to) {
911 BranchOrBacktrack(no_condition, to);
912 }
913
914
915 void RegExpMacroAssemblerX87::IfRegisterGE(int reg,
916 int comparand,
917 Label* if_ge) {
918 __ cmp(register_location(reg), Immediate(comparand));
919 BranchOrBacktrack(greater_equal, if_ge);
920 }
921
922
923 void RegExpMacroAssemblerX87::IfRegisterLT(int reg,
924 int comparand,
925 Label* if_lt) {
926 __ cmp(register_location(reg), Immediate(comparand));
927 BranchOrBacktrack(less, if_lt);
928 }
929
930
931 void RegExpMacroAssemblerX87::IfRegisterEqPos(int reg,
932 Label* if_eq) {
933 __ cmp(edi, register_location(reg));
934 BranchOrBacktrack(equal, if_eq);
935 }
936
937
938 RegExpMacroAssembler::IrregexpImplementation
939 RegExpMacroAssemblerX87::Implementation() {
940 return kX87Implementation;
941 }
942
943
944 void RegExpMacroAssemblerX87::LoadCurrentCharacter(int cp_offset,
945 Label* on_end_of_input,
946 bool check_bounds,
947 int characters) {
948 DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
949 DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
950 if (check_bounds) {
951 CheckPosition(cp_offset + characters - 1, on_end_of_input);
952 }
953 LoadCurrentCharacterUnchecked(cp_offset, characters);
954 }
955
956
957 void RegExpMacroAssemblerX87::PopCurrentPosition() {
958 Pop(edi);
959 }
960
961
962 void RegExpMacroAssemblerX87::PopRegister(int register_index) {
963 Pop(eax);
964 __ mov(register_location(register_index), eax);
965 }
966
967
968 void RegExpMacroAssemblerX87::PushBacktrack(Label* label) {
969 Push(Immediate::CodeRelativeOffset(label));
970 CheckStackLimit();
971 }
972
973
974 void RegExpMacroAssemblerX87::PushCurrentPosition() {
975 Push(edi);
976 }
977
978
979 void RegExpMacroAssemblerX87::PushRegister(int register_index,
980 StackCheckFlag check_stack_limit) {
981 __ mov(eax, register_location(register_index));
982 Push(eax);
983 if (check_stack_limit) CheckStackLimit();
984 }
985
986
987 void RegExpMacroAssemblerX87::ReadCurrentPositionFromRegister(int reg) {
988 __ mov(edi, register_location(reg));
989 }
990
991
992 void RegExpMacroAssemblerX87::ReadStackPointerFromRegister(int reg) {
993 __ mov(backtrack_stackpointer(), register_location(reg));
994 __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
995 }
996
997 void RegExpMacroAssemblerX87::SetCurrentPositionFromEnd(int by) {
998 Label after_position;
999 __ cmp(edi, -by * char_size());
1000 __ j(greater_equal, &after_position, Label::kNear);
1001 __ mov(edi, -by * char_size());
1002 // On RegExp code entry (where this operation is used), the character before
1003 // the current position is expected to be already loaded.
1004 // We have advanced the position, so it's safe to read backwards.
1005 LoadCurrentCharacterUnchecked(-1, 1);
1006 __ bind(&after_position);
1007 }
1008
1009
1010 void RegExpMacroAssemblerX87::SetRegister(int register_index, int to) {
1011 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1012 __ mov(register_location(register_index), Immediate(to));
1013 }
1014
1015
1016 bool RegExpMacroAssemblerX87::Succeed() {
1017 __ jmp(&success_label_);
1018 return global();
1019 }
1020
1021
1022 void RegExpMacroAssemblerX87::WriteCurrentPositionToRegister(int reg,
1023 int cp_offset) {
1024 if (cp_offset == 0) {
1025 __ mov(register_location(reg), edi);
1026 } else {
1027 __ lea(eax, Operand(edi, cp_offset * char_size()));
1028 __ mov(register_location(reg), eax);
1029 }
1030 }
1031
1032
1033 void RegExpMacroAssemblerX87::ClearRegisters(int reg_from, int reg_to) {
1034 DCHECK(reg_from <= reg_to);
1035 __ mov(eax, Operand(ebp, kInputStartMinusOne));
1036 for (int reg = reg_from; reg <= reg_to; reg++) {
1037 __ mov(register_location(reg), eax);
1038 }
1039 }
1040
1041
1042 void RegExpMacroAssemblerX87::WriteStackPointerToRegister(int reg) {
1043 __ mov(eax, backtrack_stackpointer());
1044 __ sub(eax, Operand(ebp, kStackHighEnd));
1045 __ mov(register_location(reg), eax);
1046 }
1047
1048
1049 // Private methods:
1050
1051 void RegExpMacroAssemblerX87::CallCheckStackGuardState(Register scratch) {
1052 static const int num_arguments = 3;
1053 __ PrepareCallCFunction(num_arguments, scratch);
1054 // RegExp code frame pointer.
1055 __ mov(Operand(esp, 2 * kPointerSize), ebp);
1056 // Code* of self.
1057 __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject()));
1058 // Next address on the stack (will be address of return address).
1059 __ lea(eax, Operand(esp, -kPointerSize));
1060 __ mov(Operand(esp, 0 * kPointerSize), eax);
1061 ExternalReference check_stack_guard =
1062 ExternalReference::re_check_stack_guard_state(isolate());
1063 __ CallCFunction(check_stack_guard, num_arguments);
1064 }
1065
1066
1067 // Helper function for reading a value out of a stack frame.
1068 template <typename T>
1069 static T& frame_entry(Address re_frame, int frame_offset) {
1070 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
1071 }
1072
1073
1074 template <typename T>
1075 static T* frame_entry_address(Address re_frame, int frame_offset) {
1076 return reinterpret_cast<T*>(re_frame + frame_offset);
1077 }
1078
1079
1080 int RegExpMacroAssemblerX87::CheckStackGuardState(Address* return_address,
1081 Code* re_code,
1082 Address re_frame) {
1083 return NativeRegExpMacroAssembler::CheckStackGuardState(
1084 frame_entry<Isolate*>(re_frame, kIsolate),
1085 frame_entry<int>(re_frame, kStartIndex),
1086 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1087 frame_entry_address<String*>(re_frame, kInputString),
1088 frame_entry_address<const byte*>(re_frame, kInputStart),
1089 frame_entry_address<const byte*>(re_frame, kInputEnd));
1090 }
1091
1092
1093 Operand RegExpMacroAssemblerX87::register_location(int register_index) {
1094 DCHECK(register_index < (1<<30));
1095 if (num_registers_ <= register_index) {
1096 num_registers_ = register_index + 1;
1097 }
1098 return Operand(ebp, kRegisterZero - register_index * kPointerSize);
1099 }
1100
1101
1102 void RegExpMacroAssemblerX87::CheckPosition(int cp_offset,
1103 Label* on_outside_input) {
1104 __ cmp(edi, -cp_offset * char_size());
1105 BranchOrBacktrack(greater_equal, on_outside_input);
1106 }
1107
1108
1109 void RegExpMacroAssemblerX87::BranchOrBacktrack(Condition condition,
1110 Label* to) {
1111 if (condition < 0) { // No condition
1112 if (to == NULL) {
1113 Backtrack();
1114 return;
1115 }
1116 __ jmp(to);
1117 return;
1118 }
1119 if (to == NULL) {
1120 __ j(condition, &backtrack_label_);
1121 return;
1122 }
1123 __ j(condition, to);
1124 }
1125
1126
1127 void RegExpMacroAssemblerX87::SafeCall(Label* to) {
1128 Label return_to;
1129 __ push(Immediate::CodeRelativeOffset(&return_to));
1130 __ jmp(to);
1131 __ bind(&return_to);
1132 }
1133
1134
1135 void RegExpMacroAssemblerX87::SafeReturn() {
1136 __ pop(ebx);
1137 __ add(ebx, Immediate(masm_->CodeObject()));
1138 __ jmp(ebx);
1139 }
1140
1141
1142 void RegExpMacroAssemblerX87::SafeCallTarget(Label* name) {
1143 __ bind(name);
1144 }
1145
1146
1147 void RegExpMacroAssemblerX87::Push(Register source) {
1148 DCHECK(!source.is(backtrack_stackpointer()));
1149 // Notice: This updates flags, unlike normal Push.
1150 __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
1151 __ mov(Operand(backtrack_stackpointer(), 0), source);
1152 }
1153
1154
1155 void RegExpMacroAssemblerX87::Push(Immediate value) {
1156 // Notice: This updates flags, unlike normal Push.
1157 __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
1158 __ mov(Operand(backtrack_stackpointer(), 0), value);
1159 }
1160
1161
1162 void RegExpMacroAssemblerX87::Pop(Register target) {
1163 DCHECK(!target.is(backtrack_stackpointer()));
1164 __ mov(target, Operand(backtrack_stackpointer(), 0));
1165 // Notice: This updates flags, unlike normal Pop.
1166 __ add(backtrack_stackpointer(), Immediate(kPointerSize));
1167 }
1168
1169
1170 void RegExpMacroAssemblerX87::CheckPreemption() {
1171 // Check for preemption.
1172 Label no_preempt;
1173 ExternalReference stack_limit =
1174 ExternalReference::address_of_stack_limit(isolate());
1175 __ cmp(esp, Operand::StaticVariable(stack_limit));
1176 __ j(above, &no_preempt);
1177
1178 SafeCall(&check_preempt_label_);
1179
1180 __ bind(&no_preempt);
1181 }
1182
1183
1184 void RegExpMacroAssemblerX87::CheckStackLimit() {
1185 Label no_stack_overflow;
1186 ExternalReference stack_limit =
1187 ExternalReference::address_of_regexp_stack_limit(isolate());
1188 __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit));
1189 __ j(above, &no_stack_overflow);
1190
1191 SafeCall(&stack_overflow_label_);
1192
1193 __ bind(&no_stack_overflow);
1194 }
1195
1196
1197 void RegExpMacroAssemblerX87::LoadCurrentCharacterUnchecked(int cp_offset,
1198 int characters) {
1199 if (mode_ == LATIN1) {
1200 if (characters == 4) {
1201 __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
1202 } else if (characters == 2) {
1203 __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
1204 } else {
1205 DCHECK(characters == 1);
1206 __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
1207 }
1208 } else {
1209 DCHECK(mode_ == UC16);
1210 if (characters == 2) {
1211 __ mov(current_character(),
1212 Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1213 } else {
1214 DCHECK(characters == 1);
1215 __ movzx_w(current_character(),
1216 Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1217 }
1218 }
1219 }
1220
1221
1222 #undef __
1223
1224 #endif // V8_INTERPRETED_REGEXP
1225
1226 } // namespace internal
1227 } // namespace v8
1228
1229 #endif // V8_TARGET_ARCH_X87
OLDNEW
« no previous file with comments | « src/x87/regexp-macro-assembler-x87.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698