Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Side by Side Diff: runtime/lib/regexp_patch.dart

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Squashed all irregexp CLs, addressed comments. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import "dart:collection" show LinkedList, LinkedListEntry;
6
5 patch class RegExp { 7 patch class RegExp {
6 /* patch */ factory RegExp(String source, 8 /* patch */ factory RegExp(String source,
7 {bool multiLine: false, 9 {bool multiLine: false,
8 bool caseSensitive: true}) { 10 bool caseSensitive: true}) {
9 return new _JSSyntaxRegExp(source, 11 _JSSyntaxRegExpHashKey key = new _JSSyntaxRegExpHashKey(
10 multiLine: multiLine, 12 source, multiLine, caseSensitive);
11 caseSensitive: caseSensitive); 13 _JSSyntaxRegExpHashValue value = _cache[key];
14
15 if (value == null) {
16 if (_cache.length > _MAX_CACHE_SIZE) {
17 _JSSyntaxRegExpHashKey lastKey = _leastRecentlyUsed.last;
18 lastKey.unlink();
19 _cache.remove(lastKey);
20 }
21
22 value = new _JSSyntaxRegExpHashValue(
23 new _JSSyntaxRegExp(source,
24 multiLine: multiLine,
25 caseSensitive: caseSensitive),
26 key);
27 _cache[key] = value;
28 } else {
29 value.key.unlink();
30 }
31
32 assert(value != null);
33
34 _leastRecentlyUsed.addFirst(value.key);
35 assert(_leastRecentlyUsed.length == _cache.length);
36
37 // TODO(jgruber): We might not want to canonicalize regexp objects.
38 return value.regexp;
39 }
40
41 // Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE
42 // elements using an LRU eviction strategy.
43 // TODO(jgruber): Do not impose a fixed limit on the number of cached objects.
44 // Other possibilities could be limiting by the size of the regexp objects,
45 // or imposing a lower time bound for the most recent use under which a regexp
46 // may not be removed from the cache.
47 static const int _MAX_CACHE_SIZE = 256;
48 static final Map<_JSSyntaxRegExpHashKey, _JSSyntaxRegExp> _cache =
49 <_JSSyntaxRegExpHashKey, _JSSyntaxRegExpValue>{ };
50 static final LinkedList<_JSSyntaxRegExpHashKey> _leastRecentlyUsed =
zerny-google 2014/10/07 12:18:35 Nit: Maybe this should just be named _recentlyUsed
jgruber1 2014/10/07 15:00:25 Done.
51 new LinkedList<_JSSyntaxRegExpHashKey>();
52 }
53
54
55 // Represents both a key in the regular expression cache as well as its
56 // corresponding entry in the LRU list.
57 class _JSSyntaxRegExpHashKey extends LinkedListEntry<_JSSyntaxRegExpHashKey> {
58 final String pattern;
59 final bool multiLine;
60 final bool caseSensitive;
61
62 _JSSyntaxRegExpHashKey(this.pattern, this.multiLine, this.caseSensitive);
63
64 int get hashCode => pattern.hashCode;
65 bool operator==(_JSSyntaxRegExpHashKey that) {
66 return (this.pattern == that.pattern) &&
67 (this.multiLine == that.multiLine) &&
68 (this.caseSensitive == that.caseSensitive);
12 } 69 }
13 } 70 }
14 71
72
73 // Represents a value in the regular expression cache. Contains a pointer
74 // back to the key in order to access the corresponding LRU entry.
75 class _JSSyntaxRegExpHashValue {
76 final _JSSyntaxRegExp regexp;
77 final _JSSyntaxRegExpHashKey key;
78
79 _JSSyntaxRegExpHashValue(this.regexp, this.key);
80 }
81
82
15 class _JSRegExpMatch implements Match { 83 class _JSRegExpMatch implements Match {
16 _JSRegExpMatch(this._regexp, this.input, this._match); 84 _JSRegExpMatch(this._regexp, this.input, this._match);
17 85
18 int get start => _start(0); 86 int get start => _start(0);
19 int get end => _end(0); 87 int get end => _end(0);
20 88
21 int _start(int groupIdx) { 89 int _start(int groupIdx) {
22 return _match[(groupIdx * _MATCH_PAIR)]; 90 return _match[(groupIdx * _MATCH_PAIR)];
23 } 91 }
24 92
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
62 } 130 }
63 131
64 132
65 class _JSSyntaxRegExp implements RegExp { 133 class _JSSyntaxRegExp implements RegExp {
66 factory _JSSyntaxRegExp( 134 factory _JSSyntaxRegExp(
67 String pattern, 135 String pattern,
68 {bool multiLine: false, 136 {bool multiLine: false,
69 bool caseSensitive: true}) native "JSSyntaxRegExp_factory"; 137 bool caseSensitive: true}) native "JSSyntaxRegExp_factory";
70 138
71 Match firstMatch(String str) { 139 Match firstMatch(String str) {
140 if (str is! String) throw new ArgumentError(str);
72 List match = _ExecuteMatch(str, 0); 141 List match = _ExecuteMatch(str, 0);
73 if (match == null) { 142 if (match == null) {
74 return null; 143 return null;
75 } 144 }
76 return new _JSRegExpMatch(this, str, match); 145 return new _JSRegExpMatch(this, str, match);
77 } 146 }
78 147
79 Iterable<Match> allMatches(String string, [int start = 0]) { 148 Iterable<Match> allMatches(String string, [int start = 0]) {
80 if (string is! String) throw new ArgumentError(string); 149 if (string is! String) throw new ArgumentError(string);
81 if (start is! int) throw new ArgumentError(start); 150 if (start is! int) throw new ArgumentError(start);
82 if (0 > start || start > string.length) { 151 if (0 > start || start > string.length) {
83 throw new RangeError.range(start, 0, string.length); 152 throw new RangeError.range(start, 0, string.length);
84 } 153 }
85 return new _AllMatchesIterable(this, string, start); 154 return new _AllMatchesIterable(this, string, start);
86 } 155 }
87 156
88 Match matchAsPrefix(String string, [int start = 0]) { 157 Match matchAsPrefix(String string, [int start = 0]) {
158 if (string is! String) throw new ArgumentError(string);
159 if (start is! int) throw new ArgumentError(start);
89 if (start < 0 || start > string.length) { 160 if (start < 0 || start > string.length) {
90 throw new RangeError.range(start, 0, string.length); 161 throw new RangeError.range(start, 0, string.length);
91 } 162 }
92 // Inefficient check that searches for a later match too. 163 // Inefficient check that searches for a later match too.
93 // Change this when possible. 164 // Change this when possible.
94 List<int> list = _ExecuteMatch(string, start); 165 List<int> list = _ExecuteMatch(string, start);
95 if (list == null) return null; 166 if (list == null) return null;
96 if (list[0] != start) return null; 167 if (list[0] != start) return null;
97 return new _JSRegExpMatch(this, string, list); 168 return new _JSRegExpMatch(this, string, list);
98 } 169 }
99 170
100 bool hasMatch(String str) { 171 bool hasMatch(String str) {
172 if (str is! String) throw new ArgumentError(str);
101 List match = _ExecuteMatch(str, 0); 173 List match = _ExecuteMatch(str, 0);
102 return (match == null) ? false : true; 174 return (match == null) ? false : true;
103 } 175 }
104 176
105 String stringMatch(String str) { 177 String stringMatch(String str) {
178 if (str is! String) throw new ArgumentError(str);
106 List match = _ExecuteMatch(str, 0); 179 List match = _ExecuteMatch(str, 0);
107 if (match == null) { 180 if (match == null) {
108 return null; 181 return null;
109 } 182 }
110 return str._substringUnchecked(match[0], match[1]); 183 return str._substringUnchecked(match[0], match[1]);
111 } 184 }
112 185
113 String get pattern native "JSSyntaxRegExp_getPattern"; 186 String get pattern native "JSSyntaxRegExp_getPattern";
114 187
115 bool get isMultiLine native "JSSyntaxRegExp_getIsMultiLine"; 188 bool get isMultiLine native "JSSyntaxRegExp_getIsMultiLine";
116 189
117 bool get isCaseSensitive native "JSSyntaxRegExp_getIsCaseSensitive"; 190 bool get isCaseSensitive native "JSSyntaxRegExp_getIsCaseSensitive";
118 191
119 int get _groupCount native "JSSyntaxRegExp_getGroupCount"; 192 int get _groupCount native "JSSyntaxRegExp_getGroupCount";
120 193
194 // Byte map of one byte characters with a 0xff if the character is a word
195 // character (digit, letter or underscore) and 0x00 otherwise.
196 // Used by generated RegExp code.
197 static const List<int> _wordCharacterMap = const <int>[
198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
202
203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
205 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // '0' - '7'
206 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // '8' - '9'
207
208 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' - 'G'
209 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'H' - 'O'
210 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' - 'W'
211 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, // 'X' - 'Z', '_'
212
213 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' - 'g'
214 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'h' - 'o'
215 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' - 'w'
216 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // 'x' - 'z'
217 // Latin-1 range
218 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
220 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
221 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222
223 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227
228 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
229 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
230 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
232
233 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
234 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
235 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
236 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
237 ];
238
121 List _ExecuteMatch(String str, int start_index) 239 List _ExecuteMatch(String str, int start_index)
122 native "JSSyntaxRegExp_ExecuteMatch"; 240 native "JSSyntaxRegExp_ExecuteMatch";
123 } 241 }
124 242
125 class _AllMatchesIterable extends IterableBase<Match> { 243 class _AllMatchesIterable extends IterableBase<Match> {
126 final _JSSyntaxRegExp _re; 244 final _JSSyntaxRegExp _re;
127 final String _str; 245 final String _str;
128 final int _start; 246 final int _start;
129 247
130 _AllMatchesIterable(this._re, this._str, this._start); 248 _AllMatchesIterable(this._re, this._str, this._start);
(...skipping 23 matching lines...) Expand all
154 _nextIndex++; 272 _nextIndex++;
155 } 273 }
156 return true; 274 return true;
157 } 275 }
158 } 276 }
159 _current = null; 277 _current = null;
160 _re = null; 278 _re = null;
161 return false; 279 return false;
162 } 280 }
163 } 281 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698