Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(638)

Side by Side Diff: runtime/lib/regexp_patch.dart

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Port remaining V8 regexp tests and fix exposed bugs. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import "dart:collection" show LinkedList, LinkedListEntry;
6
5 patch class RegExp { 7 patch class RegExp {
6 /* patch */ factory RegExp(String source, 8 /* patch */ factory RegExp(String source,
7 {bool multiLine: false, 9 {bool multiLine: false,
8 bool caseSensitive: true}) { 10 bool caseSensitive: true}) {
9 return new _JSSyntaxRegExp(source, 11 _JSSyntaxRegExpHashKey key = new _JSSyntaxRegExpHashKey(
10 multiLine: multiLine, 12 source, multiLine, caseSensitive);
11 caseSensitive: caseSensitive); 13 _JSSyntaxRegExpHashValue value = _cache[key];
14
15 if (value == null) {
16 if (_cache.length > _MAX_CACHE_SIZE) {
17 _JSSyntaxRegExpHashKey lastKey = _leastRecentlyUsed.last;
18 lastKey.unlink();
19 _cache.remove(lastKey);
20 }
21
22 value = new _JSSyntaxRegExpHashValue(
23 new _JSSyntaxRegExp(source,
24 multiLine: multiLine,
25 caseSensitive: caseSensitive),
26 key);
27 _cache[key] = value;
28 } else {
29 value.key.unlink();
30 }
31
32 assert(value != null);
33
34 _leastRecentlyUsed.addFirst(value.key);
35 assert(_leastRecentlyUsed.length == _cache.length);
36
37 return value.regexp;
Vyacheslav Egorov (Google) 2014/10/01 20:13:21 Please add TODO here that we might not want to "ca
jgruber1 2014/10/03 18:59:52 Done.
38 }
39
40 // Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE
41 // elements using an LRU eviction strategy.
42 // TODO(jgruber): Do not impose a fixed limit on the number of cached objects.
43 // Other possibilities could be limiting by the size of the regexp objects,
44 // or imposing a lower time bound for the most recent use under which a regexp
45 // may not be removed from the cache.
46 static const int _MAX_CACHE_SIZE = 256;
47 static final Map<_JSSyntaxRegExpHashKey, _JSSyntaxRegExp> _cache =
48 <_JSSyntaxRegExpHashKey, _JSSyntaxRegExpValue>{ };
49 static final LinkedList<_JSSyntaxRegExpHashKey> _leastRecentlyUsed =
50 new LinkedList<_JSSyntaxRegExpHashKey>();
51 }
52
53
54 // Represents both a key in the regular expression cache as well as its
55 // corresponding entry in the LRU list.
56 class _JSSyntaxRegExpHashKey extends LinkedListEntry<_JSSyntaxRegExpHashKey> {
57 final String pattern;
58 final bool multiLine;
59 final bool caseSensitive;
60
61 _JSSyntaxRegExpHashKey(this.pattern, this.multiLine, this.caseSensitive);
62
63 int get hashCode => pattern.hashCode;
64 bool operator==(_JSSyntaxRegExpHashKey that) {
65 return (pattern == that.pattern) &&
Vyacheslav Egorov (Google) 2014/10/01 20:13:21 this.pattern for consistency, or remove this. ever
jgruber1 2014/10/03 18:59:52 Done.
66 (this.multiLine == that.multiLine) &&
67 (this.caseSensitive == that.caseSensitive);
12 } 68 }
13 } 69 }
14 70
71
72 // Represents a value in the regular expression cache. Contains a pointer
73 // back to the key in order to access the corresponding LRU entry.
74 class _JSSyntaxRegExpHashValue {
75 final _JSSyntaxRegExp regexp;
76 final _JSSyntaxRegExpHashKey key;
77
78 _JSSyntaxRegExpHashValue(this.regexp, this.key);
79 }
80
81
15 class _JSRegExpMatch implements Match { 82 class _JSRegExpMatch implements Match {
16 _JSRegExpMatch(this._regexp, this.input, this._match); 83 _JSRegExpMatch(this._regexp, this.input, this._match);
17 84
18 int get start => _start(0); 85 int get start => _start(0);
19 int get end => _end(0); 86 int get end => _end(0);
20 87
21 int _start(int groupIdx) { 88 int _start(int groupIdx) {
22 return _match[(groupIdx * _MATCH_PAIR)]; 89 return _match[(groupIdx * _MATCH_PAIR)];
23 } 90 }
24 91
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
62 } 129 }
63 130
64 131
65 class _JSSyntaxRegExp implements RegExp { 132 class _JSSyntaxRegExp implements RegExp {
66 factory _JSSyntaxRegExp( 133 factory _JSSyntaxRegExp(
67 String pattern, 134 String pattern,
68 {bool multiLine: false, 135 {bool multiLine: false,
69 bool caseSensitive: true}) native "JSSyntaxRegExp_factory"; 136 bool caseSensitive: true}) native "JSSyntaxRegExp_factory";
70 137
71 Match firstMatch(String str) { 138 Match firstMatch(String str) {
139 if (str is! String) throw new ArgumentError(str);
72 List match = _ExecuteMatch(str, 0); 140 List match = _ExecuteMatch(str, 0);
73 if (match == null) { 141 if (match == null) {
74 return null; 142 return null;
75 } 143 }
76 return new _JSRegExpMatch(this, str, match); 144 return new _JSRegExpMatch(this, str, match);
77 } 145 }
78 146
79 Iterable<Match> allMatches(String string, [int start = 0]) { 147 Iterable<Match> allMatches(String string, [int start = 0]) {
80 if (string is! String) throw new ArgumentError(string); 148 if (string is! String) throw new ArgumentError(string);
81 if (start is! int) throw new ArgumentError(start); 149 if (start is! int) throw new ArgumentError(start);
82 if (0 > start || start > string.length) { 150 if (0 > start || start > string.length) {
83 throw new RangeError.range(start, 0, string.length); 151 throw new RangeError.range(start, 0, string.length);
84 } 152 }
85 return new _AllMatchesIterable(this, string, start); 153 return new _AllMatchesIterable(this, string, start);
86 } 154 }
87 155
88 Match matchAsPrefix(String string, [int start = 0]) { 156 Match matchAsPrefix(String string, [int start = 0]) {
157 if (string is! String) throw new ArgumentError(string);
158 if (start is! int) throw new ArgumentError(start);
89 if (start < 0 || start > string.length) { 159 if (start < 0 || start > string.length) {
90 throw new RangeError.range(start, 0, string.length); 160 throw new RangeError.range(start, 0, string.length);
91 } 161 }
92 // Inefficient check that searches for a later match too. 162 // Inefficient check that searches for a later match too.
93 // Change this when possible. 163 // Change this when possible.
94 List<int> list = _ExecuteMatch(string, start); 164 List<int> list = _ExecuteMatch(string, start);
95 if (list == null) return null; 165 if (list == null) return null;
96 if (list[0] != start) return null; 166 if (list[0] != start) return null;
97 return new _JSRegExpMatch(this, string, list); 167 return new _JSRegExpMatch(this, string, list);
98 } 168 }
99 169
100 bool hasMatch(String str) { 170 bool hasMatch(String str) {
171 if (str is! String) throw new ArgumentError(str);
101 List match = _ExecuteMatch(str, 0); 172 List match = _ExecuteMatch(str, 0);
102 return (match == null) ? false : true; 173 return (match == null) ? false : true;
103 } 174 }
104 175
105 String stringMatch(String str) { 176 String stringMatch(String str) {
177 if (str is! String) throw new ArgumentError(str);
106 List match = _ExecuteMatch(str, 0); 178 List match = _ExecuteMatch(str, 0);
107 if (match == null) { 179 if (match == null) {
108 return null; 180 return null;
109 } 181 }
110 return str._substringUnchecked(match[0], match[1]); 182 return str._substringUnchecked(match[0], match[1]);
111 } 183 }
112 184
113 String get pattern native "JSSyntaxRegExp_getPattern"; 185 String get pattern native "JSSyntaxRegExp_getPattern";
114 186
115 bool get isMultiLine native "JSSyntaxRegExp_getIsMultiLine"; 187 bool get isMultiLine native "JSSyntaxRegExp_getIsMultiLine";
116 188
117 bool get isCaseSensitive native "JSSyntaxRegExp_getIsCaseSensitive"; 189 bool get isCaseSensitive native "JSSyntaxRegExp_getIsCaseSensitive";
118 190
119 int get _groupCount native "JSSyntaxRegExp_getGroupCount"; 191 int get _groupCount native "JSSyntaxRegExp_getGroupCount";
120 192
193 // Byte map of one byte characters with a 0xff if the character is a word
194 // character (digit, letter or underscore) and 0x00 otherwise.
195 // Used by generated RegExp code.
196 static const List<int> _wordCharacterMap = const <int>[
197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
201
202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // '0' - '7'
205 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // '8' - '9'
206
207 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' - 'G'
208 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'H' - 'O'
209 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' - 'W'
210 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, // 'X' - 'Z', '_'
211
212 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' - 'g'
213 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'h' - 'o'
214 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' - 'w'
215 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // 'x' - 'z'
216 // Latin-1 range
217 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
218 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
220 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
221
222 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226
227 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
228 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
229 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
230 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231
232 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
234 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
235 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
236 ];
237
121 List _ExecuteMatch(String str, int start_index) 238 List _ExecuteMatch(String str, int start_index)
122 native "JSSyntaxRegExp_ExecuteMatch"; 239 native "JSSyntaxRegExp_ExecuteMatch";
123 } 240 }
124 241
125 class _AllMatchesIterable extends IterableBase<Match> { 242 class _AllMatchesIterable extends IterableBase<Match> {
126 final _JSSyntaxRegExp _re; 243 final _JSSyntaxRegExp _re;
127 final String _str; 244 final String _str;
128 final int _start; 245 final int _start;
129 246
130 _AllMatchesIterable(this._re, this._str, this._start); 247 _AllMatchesIterable(this._re, this._str, this._start);
(...skipping 23 matching lines...) Expand all
154 _nextIndex++; 271 _nextIndex++;
155 } 272 }
156 return true; 273 return true;
157 } 274 }
158 } 275 }
159 _current = null; 276 _current = null;
160 _re = null; 277 _re = null;
161 return false; 278 return false;
162 } 279 }
163 } 280 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698