Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Side by Side Diff: runtime/lib/regexp_patch.dart

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: more comments Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/lib/regexp.cc ('k') | runtime/vm/compiler.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import "dart:collection" show LinkedList, LinkedListEntry;
6
5 patch class RegExp { 7 patch class RegExp {
6 /* patch */ factory RegExp(String source, 8 /* patch */ factory RegExp(String source,
7 {bool multiLine: false, 9 {bool multiLine: false,
8 bool caseSensitive: true}) { 10 bool caseSensitive: true}) {
9 return new _JSSyntaxRegExp(source, 11 _JSSyntaxRegExpHashKey key = new _JSSyntaxRegExpHashKey(
10 multiLine: multiLine, 12 source, multiLine, caseSensitive);
11 caseSensitive: caseSensitive); 13 _JSSyntaxRegExpHashValue value = _cache[key];
14
15 if (value == null) {
16 if (_cache.length > _MAX_CACHE_SIZE) {
17 _JSSyntaxRegExpHashKey lastKey = _recentlyUsed.last;
18 lastKey.unlink();
19 _cache.remove(lastKey);
20 }
21
22 value = new _JSSyntaxRegExpHashValue(
23 new _JSSyntaxRegExp(source,
24 multiLine: multiLine,
25 caseSensitive: caseSensitive),
26 key);
27 _cache[key] = value;
28 } else {
29 value.key.unlink();
30 }
31
32 assert(value != null);
33
34 _recentlyUsed.addFirst(value.key);
35 assert(_recentlyUsed.length == _cache.length);
36
37 // TODO(zerny): We might not want to canonicalize regexp objects.
38 return value.regexp;
39 }
40
41 // Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE
42 // elements using an LRU eviction strategy.
43 // TODO(zerny): Do not impose a fixed limit on the number of cached objects.
44 // Other possibilities could be limiting by the size of the regexp objects,
45 // or imposing a lower time bound for the most recent use under which a regexp
46 // may not be removed from the cache.
47 // TODO(zerny): Use self-sizing cache similar to _AccessorCache in
48 // mirrors_impl.dart.
49 static const int _MAX_CACHE_SIZE = 256;
50 static final Map<_JSSyntaxRegExpHashKey, _JSSyntaxRegExp> _cache =
51 new HashMap<_JSSyntaxRegExpHashKey, _JSSyntaxRegExpValue>();
52 static final LinkedList<_JSSyntaxRegExpHashKey> _recentlyUsed =
53 new LinkedList<_JSSyntaxRegExpHashKey>();
54 }
55
56
57 // Represents both a key in the regular expression cache as well as its
58 // corresponding entry in the LRU list.
59 class _JSSyntaxRegExpHashKey extends LinkedListEntry<_JSSyntaxRegExpHashKey> {
60 final String pattern;
61 final bool multiLine;
62 final bool caseSensitive;
63
64 _JSSyntaxRegExpHashKey(this.pattern, this.multiLine, this.caseSensitive);
65
66 int get hashCode => pattern.hashCode;
67 bool operator==(_JSSyntaxRegExpHashKey that) {
68 return (this.pattern == that.pattern) &&
69 (this.multiLine == that.multiLine) &&
70 (this.caseSensitive == that.caseSensitive);
12 } 71 }
13 } 72 }
14 73
74
75 // Represents a value in the regular expression cache. Contains a pointer
76 // back to the key in order to access the corresponding LRU entry.
77 class _JSSyntaxRegExpHashValue {
78 final _JSSyntaxRegExp regexp;
79 final _JSSyntaxRegExpHashKey key;
80
81 _JSSyntaxRegExpHashValue(this.regexp, this.key);
82 }
83
84
15 class _JSRegExpMatch implements Match { 85 class _JSRegExpMatch implements Match {
16 _JSRegExpMatch(this._regexp, this.input, this._match); 86 _JSRegExpMatch(this._regexp, this.input, this._match);
17 87
18 int get start => _start(0); 88 int get start => _start(0);
19 int get end => _end(0); 89 int get end => _end(0);
20 90
21 int _start(int groupIdx) { 91 int _start(int groupIdx) {
22 return _match[(groupIdx * _MATCH_PAIR)]; 92 return _match[(groupIdx * _MATCH_PAIR)];
23 } 93 }
24 94
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
62 } 132 }
63 133
64 134
65 class _JSSyntaxRegExp implements RegExp { 135 class _JSSyntaxRegExp implements RegExp {
66 factory _JSSyntaxRegExp( 136 factory _JSSyntaxRegExp(
67 String pattern, 137 String pattern,
68 {bool multiLine: false, 138 {bool multiLine: false,
69 bool caseSensitive: true}) native "JSSyntaxRegExp_factory"; 139 bool caseSensitive: true}) native "JSSyntaxRegExp_factory";
70 140
71 Match firstMatch(String str) { 141 Match firstMatch(String str) {
142 if (str is! String) throw new ArgumentError(str);
72 List match = _ExecuteMatch(str, 0); 143 List match = _ExecuteMatch(str, 0);
73 if (match == null) { 144 if (match == null) {
74 return null; 145 return null;
75 } 146 }
76 return new _JSRegExpMatch(this, str, match); 147 return new _JSRegExpMatch(this, str, match);
77 } 148 }
78 149
79 Iterable<Match> allMatches(String string, [int start = 0]) { 150 Iterable<Match> allMatches(String string, [int start = 0]) {
80 if (string is! String) throw new ArgumentError(string); 151 if (string is! String) throw new ArgumentError(string);
81 if (start is! int) throw new ArgumentError(start); 152 if (start is! int) throw new ArgumentError(start);
82 if (0 > start || start > string.length) { 153 if (0 > start || start > string.length) {
83 throw new RangeError.range(start, 0, string.length); 154 throw new RangeError.range(start, 0, string.length);
84 } 155 }
85 return new _AllMatchesIterable(this, string, start); 156 return new _AllMatchesIterable(this, string, start);
86 } 157 }
87 158
88 Match matchAsPrefix(String string, [int start = 0]) { 159 Match matchAsPrefix(String string, [int start = 0]) {
160 if (string is! String) throw new ArgumentError(string);
161 if (start is! int) throw new ArgumentError(start);
89 if (start < 0 || start > string.length) { 162 if (start < 0 || start > string.length) {
90 throw new RangeError.range(start, 0, string.length); 163 throw new RangeError.range(start, 0, string.length);
91 } 164 }
92 // Inefficient check that searches for a later match too. 165 // Inefficient check that searches for a later match too.
93 // Change this when possible. 166 // Change this when possible.
94 List<int> list = _ExecuteMatch(string, start); 167 List<int> list = _ExecuteMatch(string, start);
95 if (list == null) return null; 168 if (list == null) return null;
96 if (list[0] != start) return null; 169 if (list[0] != start) return null;
97 return new _JSRegExpMatch(this, string, list); 170 return new _JSRegExpMatch(this, string, list);
98 } 171 }
99 172
100 bool hasMatch(String str) { 173 bool hasMatch(String str) {
174 if (str is! String) throw new ArgumentError(str);
101 List match = _ExecuteMatch(str, 0); 175 List match = _ExecuteMatch(str, 0);
102 return (match == null) ? false : true; 176 return (match == null) ? false : true;
103 } 177 }
104 178
105 String stringMatch(String str) { 179 String stringMatch(String str) {
180 if (str is! String) throw new ArgumentError(str);
106 List match = _ExecuteMatch(str, 0); 181 List match = _ExecuteMatch(str, 0);
107 if (match == null) { 182 if (match == null) {
108 return null; 183 return null;
109 } 184 }
110 return str._substringUnchecked(match[0], match[1]); 185 return str._substringUnchecked(match[0], match[1]);
111 } 186 }
112 187
113 String get pattern native "JSSyntaxRegExp_getPattern"; 188 String get pattern native "JSSyntaxRegExp_getPattern";
114 189
115 bool get isMultiLine native "JSSyntaxRegExp_getIsMultiLine"; 190 bool get isMultiLine native "JSSyntaxRegExp_getIsMultiLine";
116 191
117 bool get isCaseSensitive native "JSSyntaxRegExp_getIsCaseSensitive"; 192 bool get isCaseSensitive native "JSSyntaxRegExp_getIsCaseSensitive";
118 193
119 int get _groupCount native "JSSyntaxRegExp_getGroupCount"; 194 int get _groupCount native "JSSyntaxRegExp_getGroupCount";
120 195
196 // Byte map of one byte characters with a 0xff if the character is a word
197 // character (digit, letter or underscore) and 0x00 otherwise.
198 // Used by generated RegExp code.
199 static const List<int> _wordCharacterMap = const <int>[
200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204
205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
207 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // '0' - '7'
208 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // '8' - '9'
209
210 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' - 'G'
211 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'H' - 'O'
212 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' - 'W'
213 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, // 'X' - 'Z', '_'
214
215 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' - 'g'
216 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'h' - 'o'
217 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' - 'w'
218 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // 'x' - 'z'
219 // Latin-1 range
220 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
221 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224
225 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
228 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
229
230 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
232 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
234
235 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
236 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
237 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
238 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
239 ];
240
121 List _ExecuteMatch(String str, int start_index) 241 List _ExecuteMatch(String str, int start_index)
122 native "JSSyntaxRegExp_ExecuteMatch"; 242 native "JSSyntaxRegExp_ExecuteMatch";
123 } 243 }
124 244
125 class _AllMatchesIterable extends IterableBase<Match> { 245 class _AllMatchesIterable extends IterableBase<Match> {
126 final _JSSyntaxRegExp _re; 246 final _JSSyntaxRegExp _re;
127 final String _str; 247 final String _str;
128 final int _start; 248 final int _start;
129 249
130 _AllMatchesIterable(this._re, this._str, this._start); 250 _AllMatchesIterable(this._re, this._str, this._start);
(...skipping 23 matching lines...) Expand all
154 _nextIndex++; 274 _nextIndex++;
155 } 275 }
156 return true; 276 return true;
157 } 277 }
158 } 278 }
159 _current = null; 279 _current = null;
160 _re = null; 280 _re = null;
161 return false; 281 return false;
162 } 282 }
163 } 283 }
OLDNEW
« no previous file with comments | « runtime/lib/regexp.cc ('k') | runtime/vm/compiler.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698