Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2008 Collabora Ltd. | 3 * Copyright (C) 2008 Collabora Ltd. |
| 4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged | 4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged |
| 5 * Copyright (C) 2013 Google Inc. All rights reserved. | |
| 5 * | 6 * |
| 6 * Redistribution and use in source and binary forms, with or without | 7 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 8 * modification, are permitted provided that the following conditions |
| 8 * are met: | 9 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 10 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 11 * notice, this list of conditions and the following disclaimer. |
| 11 * 2. Redistributions in binary form must reproduce the above copyright | 12 * 2. Redistributions in binary form must reproduce the above copyright |
| 12 * notice, this list of conditions and the following disclaimer in the | 13 * notice, this list of conditions and the following disclaimer in the |
| 13 * documentation and/or other materials provided with the distribution. | 14 * documentation and/or other materials provided with the distribution. |
| 14 * | 15 * |
| 15 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | 16 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
| 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
| 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 */ | 27 */ |
| 27 | 28 |
| 28 #include "config.h" | 29 #include "config.h" |
| 29 #include "RegularExpression.h" | 30 #include "RegularExpression.h" |
| 30 | 31 |
| 31 #include <wtf/BumpPointerAllocator.h> | 32 // FIXME: These seem like a layering violation, but converting the strings manua lly |
| 32 #include <yarr/Yarr.h> | 33 // without v8String is difficult, and calling into v8 without V8RecursionScope w ill |
| 33 #include "Logging.h" | 34 // assert. Perhaps v8 basic utilities shouldn't be in bindings, or we should put |
| 35 // RegularExpression as some kind of abstract interface that's implemented in bi ndings. | |
| 36 #include "V8Binding.h" | |
| 37 #include "V8RecursionScope.h" | |
| 38 #include <wtf/MainThread.h> | |
| 34 | 39 |
| 35 namespace WebCore { | 40 namespace WebCore { |
| 36 | 41 |
| 37 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) | 42 static v8::Local<v8::Context> regexContext() |
| 38 : m_numSubpatterns(0) | |
| 39 , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode)) | |
| 40 { | 43 { |
| 44 ASSERT(isMainThread()); | |
| 45 static ScopedPersistent<v8::Context>* staticRegexContext = new ScopedPersist ent<v8::Context>(v8::Context::New()); | |
| 46 return v8::Local<v8::Context>::New(staticRegexContext->get()); | |
| 41 } | 47 } |
| 42 | 48 |
| 43 PassOwnPtr<JSC::Yarr::BytecodePattern> RegularExpression::compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) | 49 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) |
| 44 { | 50 { |
| 45 const char* constructionError = 0; | 51 v8::HandleScope handleScope; |
| 46 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseIn sensitive), (multilineMode == MultilineEnabled), &constructionError); | 52 v8::Local<v8::Context> context(regexContext()); |
| 47 if (constructionError) { | 53 v8::Context::Scope scope(context); |
| 48 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", constructi onError); | 54 |
| 49 return nullptr; | 55 unsigned flags = v8::RegExp::kNone; |
| 56 if (caseSensitivity == TextCaseInsensitive) | |
| 57 flags |= v8::RegExp::kIgnoreCase; | |
| 58 if (multilineMode == MultilineEnabled) | |
| 59 flags |= v8::RegExp::kMultiline; | |
| 60 | |
| 61 v8::TryCatch tryCatch; | |
| 62 m_regex.set(v8::RegExp::New(v8String(pattern, context->GetIsolate()), static _cast<v8::RegExp::Flags>(flags))); | |
|
adamk
2013/04/22 17:48:03
I think it would be clearer if you split this into
| |
| 63 } | |
| 64 | |
| 65 int RegularExpression::match(const String& string, int startFrom, int* matchLeng th) const | |
| 66 { | |
| 67 if (m_regex.isEmpty() || string.isNull()) | |
| 68 return -1; | |
| 69 | |
| 70 // v8 strings are limited to int. | |
| 71 if (string.length() > INT_MAX) | |
| 72 return -1; | |
| 73 | |
| 74 v8::HandleScope handleScope; | |
| 75 v8::Local<v8::Context> context(regexContext()); | |
| 76 v8::Context::Scope scope(context); | |
| 77 v8::TryCatch tryCatch; | |
| 78 | |
| 79 V8RecursionScope::MicrotaskSuppression microtaskScope; | |
| 80 | |
| 81 v8::Local<v8::Function> exec = m_regex->Get(v8::String::NewSymbol("exec")).A s<v8::Function>(); | |
| 82 | |
| 83 v8::Handle<v8::Value> argv[] = { v8String(string, context->GetIsolate()) }; | |
| 84 v8::Local<v8::Value> returnValue = exec->Call(m_regex.get(), 1, argv); | |
| 85 | |
| 86 // RegExp#exec returns null if there's no match, otherwise it returns an | |
| 87 // Array of strings with the first being the whole match string and others | |
| 88 // being subgroups. The Array also has some random properties tacked on like | |
| 89 // "index" which is the offset of the match. | |
| 90 // | |
| 91 // https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Obje cts/RegExp/exec | |
| 92 | |
| 93 if (!returnValue->IsArray()) | |
| 94 return -1; | |
| 95 | |
| 96 v8::Local<v8::Array> result = returnValue.As<v8::Array>(); | |
| 97 int matchOffset = result->Get(v8::String::NewSymbol("index"))->ToInt32()->Va lue(); | |
| 98 | |
| 99 if (matchLength) { | |
| 100 v8::Local<v8::String> match = result->Get(0).As<v8::String>(); | |
| 101 *matchLength = match->Length(); | |
| 50 } | 102 } |
| 51 | 103 |
| 52 m_numSubpatterns = pattern.m_numSubpatterns; | 104 return matchOffset; |
| 53 | |
| 54 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator); | |
| 55 } | |
| 56 | |
| 57 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const | |
| 58 { | |
| 59 if (!m_regExpByteCode) | |
| 60 return -1; | |
| 61 | |
| 62 if (str.isNull()) | |
| 63 return -1; | |
| 64 | |
| 65 int offsetVectorSize = (m_numSubpatterns + 1) * 2; | |
| 66 unsigned* offsetVector; | |
| 67 Vector<unsigned, 32> nonReturnedOvector; | |
| 68 | |
| 69 nonReturnedOvector.resize(offsetVectorSize); | |
| 70 offsetVector = nonReturnedOvector.data(); | |
| 71 | |
| 72 ASSERT(offsetVector); | |
| 73 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) | |
| 74 offsetVector[j] = JSC::Yarr::offsetNoMatch; | |
| 75 | |
| 76 unsigned result; | |
| 77 if (str.length() <= INT_MAX) | |
| 78 result = JSC::Yarr::interpret(m_regExpByteCode.get(), str, startFrom, of fsetVector); | |
| 79 else { | |
| 80 // This code can't handle unsigned offsets. Limit our processing to stri ngs with offsets that | |
| 81 // can be represented as ints. | |
| 82 result = JSC::Yarr::offsetNoMatch; | |
| 83 } | |
| 84 | |
| 85 if (result == JSC::Yarr::offsetNoMatch) | |
| 86 return -1; | |
| 87 | |
| 88 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector. | |
| 89 if (matchLength) | |
| 90 *matchLength = offsetVector[1] - offsetVector[0]; | |
| 91 return offsetVector[0]; | |
| 92 } | 105 } |
| 93 | 106 |
| 94 void replace(String& string, const RegularExpression& target, const String& repl acement) | 107 void replace(String& string, const RegularExpression& target, const String& repl acement) |
| 95 { | 108 { |
| 96 int index = 0; | 109 int index = 0; |
| 97 while (index < static_cast<int>(string.length())) { | 110 while (index < static_cast<int>(string.length())) { |
| 98 int matchLength; | 111 int matchLength; |
| 99 index = target.match(string, index, &matchLength); | 112 index = target.match(string, index, &matchLength); |
| 100 if (index < 0) | 113 if (index < 0) |
| 101 break; | 114 break; |
| 102 string.replace(index, matchLength, replacement); | 115 string.replace(index, matchLength, replacement); |
| 103 index += replacement.length(); | 116 index += replacement.length(); |
| 104 if (!matchLength) | 117 if (!matchLength) |
| 105 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* | 118 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* |
| 106 } | 119 } |
| 107 } | 120 } |
| 108 | 121 |
| 109 } // namespace WebCore | 122 } // namespace WebCore |
| OLD | NEW |