OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 // This file encapsulates all the interaction with the | |
5 // JSC regular expression library also referred to as pcre | |
6 | |
7 #include "lib/regexp_jsc.h" | |
8 | |
9 #include "platform/assert.h" | |
10 #include "vm/allocation.h" | |
11 #include "vm/exceptions.h" | |
12 #include "vm/globals.h" | |
13 #include "vm/isolate.h" | |
14 #include "third_party/jscre/pcre.h" | |
15 | |
16 namespace dart { | |
17 | |
18 static uint16_t* GetTwoByteData(const String& str) { | |
19 Zone* zone = Isolate::Current()->current_zone(); | |
20 uint16_t* two_byte_str = zone->Alloc<uint16_t>(str.Length()); | |
21 for (intptr_t i = 0; i < str.Length(); i++) { | |
22 two_byte_str[i] = str.CharAt(i); | |
23 } | |
24 return two_byte_str; | |
25 } | |
26 | |
27 | |
28 static void* JSREMalloc(size_t size) { | |
29 intptr_t regexp_size = static_cast<intptr_t>(size); | |
30 ASSERT(regexp_size > 0); | |
31 const JSRegExp& new_regex = JSRegExp::Handle(JSRegExp::New(size)); | |
32 return new_regex.GetDataStartAddress(); | |
33 } | |
34 | |
35 | |
36 static void JSREFree(void* ptr) { | |
37 USE(ptr); // Do nothing, memory is garbage collected. | |
38 } | |
39 | |
40 | |
41 static void ThrowExceptionOnError(const String& pattern, | |
42 const char* error_msg) { | |
43 if (error_msg == NULL) { | |
44 error_msg = "Unknown regexp compile error. "; | |
45 } | |
46 const String& errmsg = String::Handle(String::New(error_msg)); | |
47 const String& message = String::Handle(String::Concat(errmsg, pattern)); | |
48 const Array& args = Array::Handle(Array::New(1)); | |
49 args.SetAt(0, message); | |
50 Exceptions::ThrowByType(Exceptions::kFormat, args); | |
51 } | |
52 | |
53 | |
54 RawJSRegExp* Jscre::Compile(const String& pattern, | |
55 bool multi_line, | |
56 bool ignore_case) { | |
57 // First convert the pattern to UTF16 format as the jscre library expects | |
58 // strings to be in UTF16 encoding. | |
59 uint16_t* two_byte_pattern = GetTwoByteData(pattern); | |
60 | |
61 // A Dart regexp is always global. | |
62 bool is_global = true; | |
63 // Parse the flags. | |
64 jscre::JSRegExpIgnoreCaseOption jscre_ignore_case = ignore_case ? | |
65 jscre::JSRegExpIgnoreCase : jscre::JSRegExpDoNotIgnoreCase; | |
66 jscre::JSRegExpMultilineOption jscre_multi_line = multi_line ? | |
67 jscre::JSRegExpMultiline : jscre::JSRegExpSingleLine; | |
68 | |
69 // Compile the regex by calling into the jscre library. | |
70 uint32_t num_bracket_expressions = 0; | |
71 const char* error_msg = NULL; | |
72 jscre::JSRegExp* jscregexp = jscre::jsRegExpCompile(two_byte_pattern, | |
73 pattern.Length(), | |
74 jscre_ignore_case, | |
75 jscre_multi_line, | |
76 &num_bracket_expressions, | |
77 &error_msg, | |
78 &JSREMalloc, | |
79 &JSREFree); | |
80 | |
81 if (jscregexp == NULL) { | |
82 // There was an error compiling the regex, Throw an exception. | |
83 ThrowExceptionOnError(pattern, error_msg); | |
84 UNREACHABLE(); | |
85 return JSRegExp::null(); | |
86 } else { | |
87 // Setup the compiled regex object and return it. | |
88 JSRegExp& regexp = | |
89 JSRegExp::Handle(JSRegExp::FromDataStartAddress(jscregexp)); | |
90 regexp.set_pattern(pattern); | |
91 if (jscre_multi_line == jscre::JSRegExpMultiline) { | |
92 regexp.set_is_multi_line(); | |
93 } | |
94 if (jscre_ignore_case == jscre::JSRegExpIgnoreCase) { | |
95 regexp.set_is_ignore_case(); | |
96 } | |
97 if (is_global) { | |
98 regexp.set_is_global(); | |
99 } | |
100 regexp.set_is_complex(); // Always use jscre library. | |
101 regexp.set_num_bracket_expressions(num_bracket_expressions); | |
102 return regexp.raw(); | |
103 } | |
104 } | |
105 | |
106 | |
107 RawArray* Jscre::Execute(const JSRegExp& regex, | |
108 const String& str, | |
109 intptr_t start_index) { | |
110 // First convert the input str to UTF16 format as the jscre library expects | |
111 // strings to be in UTF16 encoding. | |
112 uint16_t* two_byte_str = GetTwoByteData(str); | |
113 | |
114 // Execute a regex match by calling into the jscre library. | |
115 jscre::JSRegExp* jscregexp = | |
116 reinterpret_cast<jscre::JSRegExp*>(regex.GetDataStartAddress()); | |
117 ASSERT(jscregexp != NULL); | |
118 const Smi& num_bracket_exprs = Smi::Handle(regex.num_bracket_expressions()); | |
119 intptr_t num_bracket_expressions = num_bracket_exprs.Value(); | |
120 Zone* zone = Isolate::Current()->current_zone(); | |
121 // The jscre library rounds the passed in size to a multiple of 3 in order | |
122 // to reuse the passed in offsets array as a temporary chunk of working | |
123 // storage during matching, so we just pass in a size which is a multiple | |
124 // of 3. | |
125 const int kJscreMultiple = 3; | |
126 int offsets_length = (num_bracket_expressions + 1) * kJscreMultiple; | |
127 int* offsets = NULL; | |
128 offsets = zone->Alloc<int>(offsets_length); | |
129 int retval = jscre::jsRegExpExecute(jscregexp, | |
130 two_byte_str, | |
131 str.Length(), | |
132 start_index, | |
133 offsets, | |
134 offsets_length); | |
135 | |
136 // The KJS JavaScript engine returns null (ie, a failed match) when | |
137 // JSRE's internal match limit is exceeded. We duplicate that behavior here. | |
138 if (retval == jscre::JSRegExpErrorNoMatch | |
139 || retval == jscre::JSRegExpErrorHitLimit) { | |
140 return Array::null(); | |
141 } | |
142 | |
143 // Other JSRE errors: | |
144 if (retval < 0) { | |
145 const String& pattern = String::Handle(regex.pattern()); | |
146 const int kErrorLength = 256; | |
147 char error_msg[kErrorLength]; | |
148 OS::SNPrint(error_msg, kErrorLength, | |
149 "jscre::jsRegExpExecute error : %d", retval); | |
150 ThrowExceptionOnError(pattern, error_msg); | |
151 UNREACHABLE(); | |
152 return Array::null(); | |
153 } | |
154 | |
155 const int kMatchPair = 2; | |
156 Array& array = | |
157 Array::Handle(Array::New(kMatchPair * (num_bracket_expressions + 1))); | |
158 // The matches come in (start, end + 1) pairs for each bracketted expression. | |
159 Smi& start = Smi::Handle(); | |
160 Smi& end = Smi::Handle(); | |
161 for (intptr_t i = 0; | |
162 i < (kMatchPair * (num_bracket_expressions + 1)); | |
163 i += kMatchPair) { | |
164 start = Smi::New(offsets[i]); | |
165 end = Smi::New(offsets[i + 1]); | |
166 array.SetAt(i, start); | |
167 array.SetAt(i+1, end); | |
168 } | |
169 return array.raw(); | |
170 } | |
171 | |
172 } // namespace dart | |
OLD | NEW |