OLD | NEW |
---|---|
1 // Copyright 2008-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2008-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
44 * - edx : current character. Must be loaded using LoadCurrentCharacter | 44 * - edx : current character. Must be loaded using LoadCurrentCharacter |
45 * before using any of the dispatch methods. | 45 * before using any of the dispatch methods. |
46 * - edi : current position in input, as negative offset from end of string. | 46 * - edi : current position in input, as negative offset from end of string. |
47 * Please notice that this is the byte offset, not the character offset! | 47 * Please notice that this is the byte offset, not the character offset! |
48 * - esi : end of input (points to byte after last character in input). | 48 * - esi : end of input (points to byte after last character in input). |
49 * - ebp : frame pointer. Used to access arguments, local variables and | 49 * - ebp : frame pointer. Used to access arguments, local variables and |
50 * RegExp registers. | 50 * RegExp registers. |
51 * - esp : points to tip of C stack. | 51 * - esp : points to tip of C stack. |
52 * - ecx : points to tip of backtrack stack | 52 * - ecx : points to tip of backtrack stack |
53 * | 53 * |
54 * The registers eax, ebx and ecx are free to use for computations. | 54 * The registers eax and ebx are free to use for computations. |
55 * | 55 * |
56 * Each call to a public method should retain this convention. | 56 * Each call to a public method should retain this convention. |
57 * The stack will have the following structure: | 57 * The stack will have the following structure: |
58 * - direct_call (if 1, direct call from JavaScript code, if 0 | 58 * - direct_call (if 1, direct call from JavaScript code, if 0 |
59 * call through the runtime system) | 59 * call through the runtime system) |
60 * - stack_area_base (High end of the memory area to use as | 60 * - stack_area_base (High end of the memory area to use as |
61 * backtracking stack) | 61 * backtracking stack) |
62 * - int* capture_array (int[num_saved_registers_], for output). | 62 * - int* capture_array (int[num_saved_registers_], for output). |
63 * - end of input (Address of end of string) | 63 * - end of input (Address of end of string) |
64 * - start of input (Address of first character in string) | 64 * - start of input (Address of first character in string) |
65 * - start index (character index of start) | 65 * - start index (character index of start) |
66 * - String* input_string (location of a handle containing the string) | 66 * - String* input_string (location of a handle containing the string) |
67 * --- frame alignment (if applicable) --- | 67 * --- frame alignment (if applicable) --- |
68 * - return address | 68 * - return address |
69 * ebp-> - old ebp | 69 * ebp-> - old ebp |
70 * - backup of caller esi | 70 * - backup of caller esi |
71 * - backup of caller edi | 71 * - backup of caller edi |
72 * - backup of caller ebx | 72 * - backup of caller ebx |
73 * - Offset of location before start of input (effectively character | 73 * - Offset of location before start of input (effectively character |
74 * position -1). Used to initialize capture registers to a non-position. | 74 * position -1). Used to initialize capture registers to a non-position. |
75 * - Boolean at start (if 1, we are starting at the start of the string, | |
76 * otherwise 0) | |
77 * - register 0 ebp[-4] (Only positions must be stored in the first | 75 * - register 0 ebp[-4] (Only positions must be stored in the first |
78 * - register 1 ebp[-8] num_saved_registers_ registers) | 76 * - register 1 ebp[-8] num_saved_registers_ registers) |
79 * - ... | 77 * - ... |
80 * | 78 * |
81 * The first num_saved_registers_ registers are initialized to point to | 79 * The first num_saved_registers_ registers are initialized to point to |
82 * "character -1" in the string (i.e., char_size() bytes before the first | 80 * "character -1" in the string (i.e., char_size() bytes before the first |
83 * character of the string). The remaining registers starts out as garbage. | 81 * character of the string). The remaining registers starts out as garbage. |
84 * | 82 * |
85 * The data up to the return address must be placed there by the calling | 83 * The data up to the return address must be placed there by the calling |
86 * code, by calling the code entry as cast to a function with the signature: | 84 * code, by calling the code entry as cast to a function with the signature: |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
171 | 169 |
172 void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) { | 170 void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) { |
173 __ cmp(current_character(), limit); | 171 __ cmp(current_character(), limit); |
174 BranchOrBacktrack(greater, on_greater); | 172 BranchOrBacktrack(greater, on_greater); |
175 } | 173 } |
176 | 174 |
177 | 175 |
178 void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { | 176 void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { |
179 Label not_at_start; | 177 Label not_at_start; |
180 // Did we start the match at the start of the string at all? | 178 // Did we start the match at the start of the string at all? |
181 __ cmp(Operand(ebp, kAtStart), Immediate(0)); | 179 __ cmp(Operand(ebp, kStartIndex), Immediate(0)); |
182 BranchOrBacktrack(equal, ¬_at_start); | 180 BranchOrBacktrack(not_equal, ¬_at_start); |
183 // If we did, are we still at the start of the input? | 181 // If we did, are we still at the start of the input? |
184 __ lea(eax, Operand(esi, edi, times_1, 0)); | 182 __ lea(eax, Operand(esi, edi, times_1, 0)); |
185 __ cmp(eax, Operand(ebp, kInputStart)); | 183 __ cmp(eax, Operand(ebp, kInputStart)); |
186 BranchOrBacktrack(equal, on_at_start); | 184 BranchOrBacktrack(equal, on_at_start); |
187 __ bind(¬_at_start); | 185 __ bind(¬_at_start); |
188 } | 186 } |
189 | 187 |
190 | 188 |
191 void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) { | 189 void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) { |
192 // Did we start the match at the start of the string at all? | 190 // Did we start the match at the start of the string at all? |
193 __ cmp(Operand(ebp, kAtStart), Immediate(0)); | 191 __ cmp(Operand(ebp, kStartIndex), Immediate(0)); |
194 BranchOrBacktrack(equal, on_not_at_start); | 192 BranchOrBacktrack(not_equal, on_not_at_start); |
195 // If we did, are we still at the start of the input? | 193 // If we did, are we still at the start of the input? |
196 __ lea(eax, Operand(esi, edi, times_1, 0)); | 194 __ lea(eax, Operand(esi, edi, times_1, 0)); |
197 __ cmp(eax, Operand(ebp, kInputStart)); | 195 __ cmp(eax, Operand(ebp, kInputStart)); |
198 BranchOrBacktrack(not_equal, on_not_at_start); | 196 BranchOrBacktrack(not_equal, on_not_at_start); |
199 } | 197 } |
200 | 198 |
201 | 199 |
202 void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) { | 200 void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) { |
203 __ cmp(current_character(), limit); | 201 __ cmp(current_character(), limit); |
204 BranchOrBacktrack(less, on_less); | 202 BranchOrBacktrack(less, on_less); |
205 } | 203 } |
206 | 204 |
207 | 205 |
208 void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, | 206 void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, |
209 int cp_offset, | 207 int cp_offset, |
210 Label* on_failure, | 208 Label* on_failure, |
211 bool check_end_of_string) { | 209 bool check_end_of_string) { |
210 #ifdef DEBUG | |
211 // If input is ASCII, don't even bother calling here if the string to | |
212 // match contains a non-ascii character. | |
213 if (mode_ == ASCII) { | |
214 for (int i = 0; i < str.length(); i++) { | |
215 ASSERT(str[i] <= String::kMaxAsciiCharCodeU); | |
216 } | |
217 } | |
218 #endif | |
212 int byte_length = str.length() * char_size(); | 219 int byte_length = str.length() * char_size(); |
213 int byte_offset = cp_offset * char_size(); | 220 int byte_offset = cp_offset * char_size(); |
214 if (check_end_of_string) { | 221 if (check_end_of_string) { |
215 // Check that there are at least str.length() characters left in the input. | 222 // Check that there are at least str.length() characters left in the input. |
216 __ cmp(Operand(edi), Immediate(-(byte_offset + byte_length))); | 223 __ cmp(Operand(edi), Immediate(-(byte_offset + byte_length))); |
217 BranchOrBacktrack(greater, on_failure); | 224 BranchOrBacktrack(greater, on_failure); |
218 } | 225 } |
219 | 226 |
220 if (on_failure == NULL) { | 227 if (on_failure == NULL) { |
221 // Instead of inlining a backtrack, (re)use the global backtrack target. | 228 // Instead of inlining a backtrack, (re)use the global backtrack target. |
222 on_failure = &backtrack_label_; | 229 on_failure = &backtrack_label_; |
223 } | 230 } |
224 | 231 |
225 for (int i = 0; i < str.length(); i++) { | 232 // Do one character test first to minimize loading for the case that |
233 // we don't match at all (loading more than one character introduces that | |
234 // chance of reading unaligned and reading across cache boundaries). | |
235 // If the first character matches, expect a larger chance of matching the | |
236 // string, and start loading more characters at a time. | |
237 if (mode_ == ASCII) { | |
238 __ cmpb(Operand(esi, edi, times_1, byte_offset), | |
239 static_cast<int8_t>(str[0])); | |
240 } else { | |
241 // Don't use 16-bit immediate. The size changing prefix throws off | |
242 // pre-decoding. | |
243 __ movzx_w(eax, | |
244 Operand(esi, edi, times_1, byte_offset)); | |
245 __ cmp(eax, static_cast<int32_t>(str[0])); | |
246 } | |
247 BranchOrBacktrack(not_equal, on_failure); | |
248 | |
249 __ lea(ebx, Operand(esi, edi, times_1, 0)); | |
250 for (int i = 0, n = str.length(); i < n;) { | |
Erik Corry
2010/05/10 20:23:47
i = 1 seems better and the x64 version has it.
Lasse Reichstein
2010/05/11 07:28:47
Bugger. Good catch!
| |
226 if (mode_ == ASCII) { | 251 if (mode_ == ASCII) { |
227 __ cmpb(Operand(esi, edi, times_1, byte_offset + i), | 252 if (i <= n - 4) { |
228 static_cast<int8_t>(str[i])); | 253 int combined_chars = |
254 (static_cast<uint32_t>(str[i + 0]) << 0) | | |
255 (static_cast<uint32_t>(str[i + 1]) << 8) | | |
256 (static_cast<uint32_t>(str[i + 2]) << 16) | | |
257 (static_cast<uint32_t>(str[i + 3]) << 24); | |
258 __ cmp(Operand(ebx, byte_offset + i), Immediate(combined_chars)); | |
259 i += 4; | |
260 } else { | |
261 __ cmpb(Operand(ebx, byte_offset + i), | |
262 static_cast<int8_t>(str[i])); | |
263 i += 1; | |
264 } | |
229 } else { | 265 } else { |
230 ASSERT(mode_ == UC16); | 266 ASSERT(mode_ == UC16); |
231 __ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)), | 267 if (i <= n - 2) { |
232 Immediate(str[i])); | 268 __ cmp(Operand(ebx, byte_offset + i * sizeof(uc16)), |
269 Immediate(*reinterpret_cast<const int*>(&str[i]))); | |
270 i += 2; | |
271 } else { | |
272 // Avoid a 16-bit immediate operation. It uses the size-changing | |
273 // 0x66 prefix which causes pre-decoder misprediction and pipeline | |
274 // flushes. | |
Erik Corry
2010/05/10 20:23:47
Is it possible to quote the section of the optimiz
Lasse Reichstein
2010/05/11 07:28:47
Quite possible, and done.
(Intel, Section 3.4.2.3,
| |
275 __ movzx_w(eax, | |
276 Operand(ebx, byte_offset + i * sizeof(uc16))); | |
277 __ cmp(eax, static_cast<int32_t>(str[i])); | |
278 i += 1; | |
279 } | |
233 } | 280 } |
234 BranchOrBacktrack(not_equal, on_failure); | 281 BranchOrBacktrack(not_equal, on_failure); |
235 } | 282 } |
236 } | 283 } |
237 | 284 |
238 | 285 |
239 void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) { | 286 void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) { |
240 Label fallthrough; | 287 Label fallthrough; |
241 __ cmp(edi, Operand(backtrack_stackpointer(), 0)); | 288 __ cmp(edi, Operand(backtrack_stackpointer(), 0)); |
242 __ j(not_equal, &fallthrough); | 289 __ j(not_equal, &fallthrough); |
(...skipping 375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
618 __ bind(&entry_label_); | 665 __ bind(&entry_label_); |
619 // Start new stack frame. | 666 // Start new stack frame. |
620 __ push(ebp); | 667 __ push(ebp); |
621 __ mov(ebp, esp); | 668 __ mov(ebp, esp); |
622 // Save callee-save registers. Order here should correspond to order of | 669 // Save callee-save registers. Order here should correspond to order of |
623 // kBackup_ebx etc. | 670 // kBackup_ebx etc. |
624 __ push(esi); | 671 __ push(esi); |
625 __ push(edi); | 672 __ push(edi); |
626 __ push(ebx); // Callee-save on MacOS. | 673 __ push(ebx); // Callee-save on MacOS. |
627 __ push(Immediate(0)); // Make room for "input start - 1" constant. | 674 __ push(Immediate(0)); // Make room for "input start - 1" constant. |
628 __ push(Immediate(0)); // Make room for "at start" constant. | |
629 | 675 |
630 // Check if we have space on the stack for registers. | 676 // Check if we have space on the stack for registers. |
631 Label stack_limit_hit; | 677 Label stack_limit_hit; |
632 Label stack_ok; | 678 Label stack_ok; |
633 | 679 |
634 ExternalReference stack_limit = | 680 ExternalReference stack_limit = |
635 ExternalReference::address_of_stack_limit(); | 681 ExternalReference::address_of_stack_limit(); |
636 __ mov(ecx, esp); | 682 __ mov(ecx, esp); |
637 __ sub(ecx, Operand::StaticVariable(stack_limit)); | 683 __ sub(ecx, Operand::StaticVariable(stack_limit)); |
638 // Handle it if the stack pointer is already below the stack limit. | 684 // Handle it if the stack pointer is already below the stack limit. |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
670 __ neg(ebx); | 716 __ neg(ebx); |
671 if (mode_ == UC16) { | 717 if (mode_ == UC16) { |
672 __ lea(eax, Operand(edi, ebx, times_2, -char_size())); | 718 __ lea(eax, Operand(edi, ebx, times_2, -char_size())); |
673 } else { | 719 } else { |
674 __ lea(eax, Operand(edi, ebx, times_1, -char_size())); | 720 __ lea(eax, Operand(edi, ebx, times_1, -char_size())); |
675 } | 721 } |
676 // Store this value in a local variable, for use when clearing | 722 // Store this value in a local variable, for use when clearing |
677 // position registers. | 723 // position registers. |
678 __ mov(Operand(ebp, kInputStartMinusOne), eax); | 724 __ mov(Operand(ebp, kInputStartMinusOne), eax); |
679 | 725 |
680 // Determine whether the start index is zero, that is at the start of the | |
681 // string, and store that value in a local variable. | |
682 __ xor_(Operand(ecx), ecx); // setcc only operates on cl (lower byte of ecx). | |
683 // Register ebx still holds -stringIndex. | |
684 __ test(ebx, Operand(ebx)); | |
685 __ setcc(zero, ecx); // 1 if 0 (start of string), 0 if positive. | |
686 __ mov(Operand(ebp, kAtStart), ecx); | |
687 | |
688 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. | 726 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. |
689 // Fill saved registers with initial value = start offset - 1 | 727 // Fill saved registers with initial value = start offset - 1 |
690 // Fill in stack push order, to avoid accessing across an unwritten | 728 // Fill in stack push order, to avoid accessing across an unwritten |
691 // page (a problem on Windows). | 729 // page (a problem on Windows). |
692 __ mov(ecx, kRegisterZero); | 730 __ mov(ecx, kRegisterZero); |
693 Label init_loop; | 731 Label init_loop; |
694 __ bind(&init_loop); | 732 __ bind(&init_loop); |
695 __ mov(Operand(ebp, ecx, times_1, +0), eax); | 733 __ mov(Operand(ebp, ecx, times_1, +0), eax); |
696 __ sub(Operand(ecx), Immediate(kPointerSize)); | 734 __ sub(Operand(ecx), Immediate(kPointerSize)); |
697 __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize); | 735 __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize); |
698 __ j(greater, &init_loop); | 736 __ j(greater, &init_loop); |
699 } | 737 } |
700 // Ensure that we have written to each stack page, in order. Skipping a page | 738 // Ensure that we have written to each stack page, in order. Skipping a page |
701 // on Windows can cause segmentation faults. Assuming page size is 4k. | 739 // on Windows can cause segmentation faults. Assuming page size is 4k. |
702 const int kPageSize = 4096; | 740 const int kPageSize = 4096; |
703 const int kRegistersPerPage = kPageSize / kPointerSize; | 741 const int kRegistersPerPage = kPageSize / kPointerSize; |
704 for (int i = num_saved_registers_ + kRegistersPerPage - 1; | 742 for (int i = num_saved_registers_ + kRegistersPerPage - 1; |
705 i < num_registers_; | 743 i < num_registers_; |
706 i += kRegistersPerPage) { | 744 i += kRegistersPerPage) { |
707 __ mov(register_location(i), eax); // One write every page. | 745 __ mov(register_location(i), eax); // One write every page. |
708 } | 746 } |
709 | 747 |
710 | 748 |
711 // Initialize backtrack stack pointer. | 749 // Initialize backtrack stack pointer. |
712 __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); | 750 __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); |
713 // Load previous char as initial value of current-character. | 751 // Load previous char as initial value of current-character. |
714 Label at_start; | 752 Label at_start; |
715 __ cmp(Operand(ebp, kAtStart), Immediate(0)); | 753 __ cmp(Operand(ebp, kStartIndex), Immediate(0)); |
716 __ j(not_equal, &at_start); | 754 __ j(equal, &at_start); |
717 LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. | 755 LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. |
718 __ jmp(&start_label_); | 756 __ jmp(&start_label_); |
719 __ bind(&at_start); | 757 __ bind(&at_start); |
720 __ mov(current_character(), '\n'); | 758 __ mov(current_character(), '\n'); |
721 __ jmp(&start_label_); | 759 __ jmp(&start_label_); |
722 | 760 |
723 | 761 |
724 // Exit code: | 762 // Exit code: |
725 if (success_label_.is_linked()) { | 763 if (success_label_.is_linked()) { |
726 // Save captures when successful. | 764 // Save captures when successful. |
(...skipping 467 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1194 } | 1232 } |
1195 } | 1233 } |
1196 } | 1234 } |
1197 | 1235 |
1198 | 1236 |
1199 #undef __ | 1237 #undef __ |
1200 | 1238 |
1201 #endif // V8_INTERPRETED_REGEXP | 1239 #endif // V8_INTERPRETED_REGEXP |
1202 | 1240 |
1203 }} // namespace v8::internal | 1241 }} // namespace v8::internal |
OLD | NEW |