OLD | NEW |
1 // Copyright 2008 the V8 project authors. All rights reserved. | 1 // Copyright 2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 PrintF("pc = %d, current = %d, bc = " \ | 45 PrintF("pc = %d, current = %d, bc = " \ |
46 #name "\n", pc - code_base, current); \ | 46 #name "\n", pc - code_base, current); \ |
47 } | 47 } |
48 #else | 48 #else |
49 # define BYTECODE(name) break; \ | 49 # define BYTECODE(name) break; \ |
50 case BC_##name: | 50 case BC_##name: |
51 #endif | 51 #endif |
52 | 52 |
53 | 53 |
54 | 54 |
55 template <typename Char> | |
56 static bool RawMatch(const byte* code_base, | 55 static bool RawMatch(const byte* code_base, |
57 Vector<const Char> subject, | 56 Vector<const uc16> subject, |
58 int* registers, | 57 int* registers, |
59 int current) { | 58 int current) { |
60 const byte* pc = code_base; | 59 const byte* pc = code_base; |
61 int backtrack_stack[1000]; | 60 int backtrack_stack[1000]; |
62 int backtrack_stack_space = 1000; | 61 int backtrack_stack_space = 1000; |
63 int* backtrack_sp = backtrack_stack; | 62 int* backtrack_sp = backtrack_stack; |
64 int current_char = -1; | 63 int current_char = -1; |
65 #ifdef DEBUG | 64 #ifdef DEBUG |
66 if (FLAG_trace_regexp_bytecodes) { | 65 if (FLAG_trace_regexp_bytecodes) { |
67 PrintF("\n\nStart bytecode interpreter\n\n"); | 66 PrintF("\n\nStart bytecode interpreter\n\n"); |
(...skipping 18 matching lines...) Expand all Loading... |
86 pc += 5; | 85 pc += 5; |
87 BYTECODE(PUSH_REGISTER) | 86 BYTECODE(PUSH_REGISTER) |
88 if (--backtrack_stack_space < 0) { | 87 if (--backtrack_stack_space < 0) { |
89 return false; // No match on backtrack stack overflow. | 88 return false; // No match on backtrack stack overflow. |
90 } | 89 } |
91 *backtrack_sp++ = registers[pc[1]]; | 90 *backtrack_sp++ = registers[pc[1]]; |
92 pc += 2; | 91 pc += 2; |
93 BYTECODE(SET_REGISTER) | 92 BYTECODE(SET_REGISTER) |
94 registers[pc[1]] = Load32(pc + 2); | 93 registers[pc[1]] = Load32(pc + 2); |
95 pc += 6; | 94 pc += 6; |
| 95 BYTECODE(ADVANCE_REGISTER) |
| 96 registers[pc[1]] += Load32(pc + 2); |
| 97 pc += 6; |
96 BYTECODE(SET_REGISTER_TO_CP) | 98 BYTECODE(SET_REGISTER_TO_CP) |
97 registers[pc[1]] = current + Load32(pc + 2); | 99 registers[pc[1]] = current + Load32(pc + 2); |
98 pc += 6; | 100 pc += 6; |
99 BYTECODE(POP_CP) | 101 BYTECODE(POP_CP) |
100 backtrack_stack_space++; | 102 backtrack_stack_space++; |
101 --backtrack_sp; | 103 --backtrack_sp; |
102 current = *backtrack_sp; | 104 current = *backtrack_sp; |
103 pc += 1; | 105 pc += 1; |
104 BYTECODE(POP_BT) | 106 BYTECODE(POP_BT) |
105 backtrack_stack_space++; | 107 backtrack_stack_space++; |
106 --backtrack_sp; | 108 --backtrack_sp; |
107 pc = code_base + *backtrack_sp; | 109 pc = code_base + *backtrack_sp; |
108 BYTECODE(POP_REGISTER) | 110 BYTECODE(POP_REGISTER) |
109 backtrack_stack_space++; | 111 backtrack_stack_space++; |
110 --backtrack_sp; | 112 --backtrack_sp; |
111 registers[pc[1]] = *backtrack_sp; | 113 registers[pc[1]] = *backtrack_sp; |
112 pc += 2; | 114 pc += 2; |
113 BYTECODE(FAIL) | 115 BYTECODE(FAIL) |
114 return false; | 116 return false; |
115 BYTECODE(FAIL_IF_WITHIN) | |
116 if (current + Load32(pc + 1) >= subject.length()) { | |
117 return false; | |
118 } | |
119 pc += 5; | |
120 BYTECODE(SUCCEED) | 117 BYTECODE(SUCCEED) |
121 return true; | 118 return true; |
122 BYTECODE(ADVANCE_CP) | 119 BYTECODE(ADVANCE_CP) |
123 current += Load32(pc + 1); | 120 current += Load32(pc + 1); |
124 pc += 5; | 121 pc += 5; |
125 BYTECODE(GOTO) | 122 BYTECODE(GOTO) |
126 pc = code_base + Load32(pc + 1); | 123 pc = code_base + Load32(pc + 1); |
127 BYTECODE(LOAD_CURRENT_CHAR) { | 124 BYTECODE(LOAD_CURRENT_CHAR) { |
128 int pos = current + Load32(pc + 1); | 125 int pos = current + Load32(pc + 1); |
129 if (pos >= subject.length()) { | 126 if (pos >= subject.length()) { |
130 current_char = -1; | 127 pc = code_base + Load32(pc + 5); |
131 } else { | 128 } else { |
132 current_char = subject[pos]; | 129 current_char = subject[pos]; |
| 130 pc += 9; |
133 } | 131 } |
134 pc += 5; | |
135 } | 132 } |
136 BYTECODE(CHECK_CHAR) { | 133 BYTECODE(CHECK_CHAR) { |
137 int c = Load16(pc + 1); | 134 int c = Load16(pc + 1); |
138 if (c != current_char) { | 135 if (c != current_char) { |
139 pc = code_base + Load32(pc + 3); | 136 pc = code_base + Load32(pc + 3); |
140 } else { | 137 } else { |
141 pc += 7; | 138 pc += 7; |
142 } | 139 } |
143 } | 140 } |
144 BYTECODE(CHECK_NOT_CHAR) { | 141 BYTECODE(CHECK_NOT_CHAR) { |
145 int c = Load16(pc + 1); | 142 int c = Load16(pc + 1); |
146 if (c == current_char) { | 143 if (c == current_char) { |
147 pc = code_base + Load32(pc + 3); | 144 pc = code_base + Load32(pc + 3); |
148 } else { | 145 } else { |
149 pc += 7; | 146 pc += 7; |
150 } | 147 } |
151 } | 148 } |
152 BYTECODE(CHECK_END) { | |
153 if (current_char != -1) { | |
154 pc = code_base + Load32(pc + 1); | |
155 } else { | |
156 pc += 5; | |
157 } | |
158 } | |
159 BYTECODE(CHECK_NOT_END) { | |
160 if (current_char == -1) { | |
161 pc = code_base + Load32(pc + 1); | |
162 } else { | |
163 pc += 5; | |
164 } | |
165 } | |
166 BYTECODE(CHECK_RANGE) { | 149 BYTECODE(CHECK_RANGE) { |
167 int start = Load16(pc + 1); | 150 int start = Load16(pc + 1); |
168 int end = Load16(pc + 3); | 151 int end = Load16(pc + 3); |
169 if (current_char >= start && current_char <= end) { | 152 if (current_char < start || current_char > end) { |
170 pc = code_base + Load32(pc + 5); | 153 pc = code_base + Load32(pc + 5); |
171 } else { | 154 } else { |
172 pc += 9; | 155 pc += 9; |
173 } | 156 } |
174 } | 157 } |
175 BYTECODE(CHECK_NOT_RANGE) { | 158 BYTECODE(CHECK_NOT_RANGE) { |
176 int start = Load16(pc + 1); | 159 int start = Load16(pc + 1); |
177 int end = Load16(pc + 3); | 160 int end = Load16(pc + 3); |
178 if (current_char < start || current_char > end || current_char == -1) { | 161 if (current_char >= start && current_char <= end) { |
179 pc = code_base + Load32(pc + 5); | 162 pc = code_base + Load32(pc + 5); |
180 } else { | 163 } else { |
181 pc += 9; | 164 pc += 9; |
182 } | 165 } |
183 } | 166 } |
184 BYTECODE(CHECK_REGISTER_LT) | 167 BYTECODE(CHECK_REGISTER_LT) |
185 if (registers[pc[1]] < Load16(pc + 2)) { | 168 if (registers[pc[1]] < Load16(pc + 2)) { |
186 pc = code_base + Load32(pc + 4); | 169 pc = code_base + Load32(pc + 4); |
187 } else { | 170 } else { |
188 pc += 8; | 171 pc += 8; |
189 } | 172 } |
190 BYTECODE(CHECK_REGISTER_GE) | 173 BYTECODE(CHECK_REGISTER_GE) |
191 if (registers[pc[1]] >= Load16(pc + 2)) { | 174 if (registers[pc[1]] >= Load16(pc + 2)) { |
192 pc = code_base + Load32(pc + 4); | 175 pc = code_base + Load32(pc + 4); |
193 } else { | 176 } else { |
194 pc += 8; | 177 pc += 8; |
195 } | 178 } |
| 179 BYTECODE(LOOKUP_MAP1) { |
| 180 // Look up character in a bitmap. If we find a 0, then jump to the |
| 181 // location at pc + 7. Otherwise fall through! |
| 182 int index = current_char - Load16(pc + 1); |
| 183 byte map = code_base[Load32(pc + 3) + (index >> 3)]; |
| 184 map = ((map >> (index & 7)) & 1); |
| 185 if (map == 0) { |
| 186 pc = code_base + Load32(pc + 7); |
| 187 } else { |
| 188 pc += 11; |
| 189 } |
| 190 } |
| 191 BYTECODE(LOOKUP_MAP2) { |
| 192 // Look up character in a half-nibble map. If we find 00, then jump to |
| 193 // the location at pc + 7. If we find 01 then jump to location at |
| 194 // pc + 11, etc. |
| 195 int index = (current_char - Load16(pc + 1)) << 1; |
| 196 byte map = code_base[Load32(pc + 3) + (index >> 3)]; |
| 197 map = ((map >> (index & 7)) & 3); |
| 198 if (map < 2) { |
| 199 if (map == 0) { |
| 200 pc = code_base + Load32(pc + 7); |
| 201 } else { |
| 202 pc = code_base + Load32(pc + 11); |
| 203 } |
| 204 } else { |
| 205 if (map == 2) { |
| 206 pc = code_base + Load32(pc + 15); |
| 207 } else { |
| 208 pc = code_base + Load32(pc + 19); |
| 209 } |
| 210 } |
| 211 } |
| 212 BYTECODE(LOOKUP_MAP8) { |
| 213 // Look up character in a byte map. Use the byte as an index into a |
| 214 // table that follows this instruction immediately. |
| 215 int index = current_char - Load16(pc + 1); |
| 216 byte map = code_base[Load32(pc + 3) + index]; |
| 217 const byte* new_pc = code_base + Load32(pc + 7) + (map << 2); |
| 218 pc = code_base + Load32(new_pc); |
| 219 } |
| 220 BYTECODE(LOOKUP_HI_MAP8) { |
| 221 // Look up high byte of this character in a byte map. Use the byte as |
| 222 // an index into a table that follows this instruction immediately. |
| 223 int index = (current_char >> 8) - pc[1]; |
| 224 byte map = code_base[Load32(pc + 2) + index]; |
| 225 const byte* new_pc = code_base + Load32(pc + 6) + (map << 2); |
| 226 pc = code_base + Load32(new_pc); |
| 227 } |
196 BYTECODE(CHECK_BACKREF) | 228 BYTECODE(CHECK_BACKREF) |
197 UNREACHABLE(); | 229 UNREACHABLE(); |
198 BYTECODE(CHECK_NOT_BACKREF) | 230 BYTECODE(CHECK_NOT_BACKREF) |
199 UNREACHABLE(); | 231 UNREACHABLE(); |
200 BYTECODE(CHECK_BITMAP) | |
201 UNREACHABLE(); | |
202 BYTECODE(CHECK_NOT_BITMAP) | |
203 UNREACHABLE(); | |
204 break; // Last one doesn't have break in macro. | 232 break; // Last one doesn't have break in macro. |
205 default: | 233 default: |
206 UNREACHABLE(); | 234 UNREACHABLE(); |
207 break; | 235 break; |
208 } | 236 } |
209 } | 237 } |
210 } | 238 } |
211 | 239 |
212 | 240 |
213 bool Re2kInterpreter::Match(ByteArray* code_array, | 241 bool Re2kInterpreter::Match(Handle<ByteArray> code_array, |
214 String* subject, | 242 Handle<String> subject, |
215 int* registers, | 243 int* registers, |
216 int start_position) { | 244 int start_position) { |
217 const byte* code_base = code_array->GetDataStartAddress(); | 245 const byte* code_base = code_array->GetDataStartAddress(); |
218 StringShape shape(subject); | 246 ASSERT(subject->IsFlat(StringShape(*subject))); |
219 ASSERT(subject->IsFlat(shape)); | 247 Handle<String> flat_two_byte = RegExpImpl::CachedStringToTwoByte(subject); |
220 if (shape.IsAsciiRepresentation()) { | 248 ASSERT(StringShape(*flat_two_byte).IsTwoByteRepresentation()); |
221 return RawMatch(code_base, | 249 return RawMatch(code_base, |
222 subject->ToAsciiVector(), | 250 flat_two_byte->ToUC16Vector(), |
223 registers, | 251 registers, |
224 start_position); | 252 start_position); |
225 } else { | |
226 return RawMatch(code_base, | |
227 subject->ToUC16Vector(), | |
228 registers, | |
229 start_position); | |
230 } | |
231 } | 253 } |
232 | 254 |
233 } } // namespace v8::internal | 255 } } // namespace v8::internal |
OLD | NEW |