| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 // This file encapsulates all the interaction with the | |
| 5 // JSC regular expression library also referred to as pcre | |
| 6 | |
| 7 #include "lib/regexp_jsc.h" | |
| 8 | |
| 9 #include "platform/assert.h" | |
| 10 #include "vm/allocation.h" | |
| 11 #include "vm/exceptions.h" | |
| 12 #include "vm/globals.h" | |
| 13 #include "vm/isolate.h" | |
| 14 #include "third_party/jscre/pcre.h" | |
| 15 | |
| 16 namespace dart { | |
| 17 | |
| 18 static uint16_t* GetTwoByteData(const String& str) { | |
| 19 Zone* zone = Isolate::Current()->current_zone(); | |
| 20 uint16_t* two_byte_str = zone->Alloc<uint16_t>(str.Length()); | |
| 21 for (intptr_t i = 0; i < str.Length(); i++) { | |
| 22 two_byte_str[i] = str.CharAt(i); | |
| 23 } | |
| 24 return two_byte_str; | |
| 25 } | |
| 26 | |
| 27 | |
| 28 static void* JSREMalloc(size_t size) { | |
| 29 intptr_t regexp_size = static_cast<intptr_t>(size); | |
| 30 ASSERT(regexp_size > 0); | |
| 31 const JSRegExp& new_regex = JSRegExp::Handle(JSRegExp::New(size)); | |
| 32 return new_regex.GetDataStartAddress(); | |
| 33 } | |
| 34 | |
| 35 | |
| 36 static void JSREFree(void* ptr) { | |
| 37 USE(ptr); // Do nothing, memory is garbage collected. | |
| 38 } | |
| 39 | |
| 40 | |
| 41 static void ThrowExceptionOnError(const String& pattern, | |
| 42 const char* error_msg) { | |
| 43 if (error_msg == NULL) { | |
| 44 error_msg = "Unknown regexp compile error. "; | |
| 45 } | |
| 46 const String& errmsg = String::Handle(String::New(error_msg)); | |
| 47 const String& message = String::Handle(String::Concat(errmsg, pattern)); | |
| 48 const Array& args = Array::Handle(Array::New(1)); | |
| 49 args.SetAt(0, message); | |
| 50 Exceptions::ThrowByType(Exceptions::kFormat, args); | |
| 51 } | |
| 52 | |
| 53 | |
| 54 RawJSRegExp* Jscre::Compile(const String& pattern, | |
| 55 bool multi_line, | |
| 56 bool ignore_case) { | |
| 57 // First convert the pattern to UTF16 format as the jscre library expects | |
| 58 // strings to be in UTF16 encoding. | |
| 59 uint16_t* two_byte_pattern = GetTwoByteData(pattern); | |
| 60 | |
| 61 // A Dart regexp is always global. | |
| 62 bool is_global = true; | |
| 63 // Parse the flags. | |
| 64 jscre::JSRegExpIgnoreCaseOption jscre_ignore_case = ignore_case ? | |
| 65 jscre::JSRegExpIgnoreCase : jscre::JSRegExpDoNotIgnoreCase; | |
| 66 jscre::JSRegExpMultilineOption jscre_multi_line = multi_line ? | |
| 67 jscre::JSRegExpMultiline : jscre::JSRegExpSingleLine; | |
| 68 | |
| 69 // Compile the regex by calling into the jscre library. | |
| 70 uint32_t num_bracket_expressions = 0; | |
| 71 const char* error_msg = NULL; | |
| 72 jscre::JSRegExp* jscregexp = jscre::jsRegExpCompile(two_byte_pattern, | |
| 73 pattern.Length(), | |
| 74 jscre_ignore_case, | |
| 75 jscre_multi_line, | |
| 76 &num_bracket_expressions, | |
| 77 &error_msg, | |
| 78 &JSREMalloc, | |
| 79 &JSREFree); | |
| 80 | |
| 81 if (jscregexp == NULL) { | |
| 82 // There was an error compiling the regex, Throw an exception. | |
| 83 ThrowExceptionOnError(pattern, error_msg); | |
| 84 UNREACHABLE(); | |
| 85 return JSRegExp::null(); | |
| 86 } else { | |
| 87 // Setup the compiled regex object and return it. | |
| 88 JSRegExp& regexp = | |
| 89 JSRegExp::Handle(JSRegExp::FromDataStartAddress(jscregexp)); | |
| 90 regexp.set_pattern(pattern); | |
| 91 if (jscre_multi_line == jscre::JSRegExpMultiline) { | |
| 92 regexp.set_is_multi_line(); | |
| 93 } | |
| 94 if (jscre_ignore_case == jscre::JSRegExpIgnoreCase) { | |
| 95 regexp.set_is_ignore_case(); | |
| 96 } | |
| 97 if (is_global) { | |
| 98 regexp.set_is_global(); | |
| 99 } | |
| 100 regexp.set_is_complex(); // Always use jscre library. | |
| 101 regexp.set_num_bracket_expressions(num_bracket_expressions); | |
| 102 return regexp.raw(); | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 | |
| 107 RawArray* Jscre::Execute(const JSRegExp& regex, | |
| 108 const String& str, | |
| 109 intptr_t start_index) { | |
| 110 // First convert the input str to UTF16 format as the jscre library expects | |
| 111 // strings to be in UTF16 encoding. | |
| 112 uint16_t* two_byte_str = GetTwoByteData(str); | |
| 113 | |
| 114 // Execute a regex match by calling into the jscre library. | |
| 115 jscre::JSRegExp* jscregexp = | |
| 116 reinterpret_cast<jscre::JSRegExp*>(regex.GetDataStartAddress()); | |
| 117 ASSERT(jscregexp != NULL); | |
| 118 const Smi& num_bracket_exprs = Smi::Handle(regex.num_bracket_expressions()); | |
| 119 intptr_t num_bracket_expressions = num_bracket_exprs.Value(); | |
| 120 Zone* zone = Isolate::Current()->current_zone(); | |
| 121 // The jscre library rounds the passed in size to a multiple of 3 in order | |
| 122 // to reuse the passed in offsets array as a temporary chunk of working | |
| 123 // storage during matching, so we just pass in a size which is a multiple | |
| 124 // of 3. | |
| 125 const int kJscreMultiple = 3; | |
| 126 int offsets_length = (num_bracket_expressions + 1) * kJscreMultiple; | |
| 127 int* offsets = NULL; | |
| 128 offsets = zone->Alloc<int>(offsets_length); | |
| 129 int retval = jscre::jsRegExpExecute(jscregexp, | |
| 130 two_byte_str, | |
| 131 str.Length(), | |
| 132 start_index, | |
| 133 offsets, | |
| 134 offsets_length); | |
| 135 | |
| 136 // The KJS JavaScript engine returns null (ie, a failed match) when | |
| 137 // JSRE's internal match limit is exceeded. We duplicate that behavior here. | |
| 138 if (retval == jscre::JSRegExpErrorNoMatch | |
| 139 || retval == jscre::JSRegExpErrorHitLimit) { | |
| 140 return Array::null(); | |
| 141 } | |
| 142 | |
| 143 // Other JSRE errors: | |
| 144 if (retval < 0) { | |
| 145 const String& pattern = String::Handle(regex.pattern()); | |
| 146 const int kErrorLength = 256; | |
| 147 char error_msg[kErrorLength]; | |
| 148 OS::SNPrint(error_msg, kErrorLength, | |
| 149 "jscre::jsRegExpExecute error : %d", retval); | |
| 150 ThrowExceptionOnError(pattern, error_msg); | |
| 151 UNREACHABLE(); | |
| 152 return Array::null(); | |
| 153 } | |
| 154 | |
| 155 const int kMatchPair = 2; | |
| 156 Array& array = | |
| 157 Array::Handle(Array::New(kMatchPair * (num_bracket_expressions + 1))); | |
| 158 // The matches come in (start, end + 1) pairs for each bracketted expression. | |
| 159 Smi& start = Smi::Handle(); | |
| 160 Smi& end = Smi::Handle(); | |
| 161 for (intptr_t i = 0; | |
| 162 i < (kMatchPair * (num_bracket_expressions + 1)); | |
| 163 i += kMatchPair) { | |
| 164 start = Smi::New(offsets[i]); | |
| 165 end = Smi::New(offsets[i + 1]); | |
| 166 array.SetAt(i, start); | |
| 167 array.SetAt(i+1, end); | |
| 168 } | |
| 169 return array.raw(); | |
| 170 } | |
| 171 | |
| 172 } // namespace dart | |
| OLD | NEW |