Chromium Code Reviews| Index: Source/core/platform/text/RegularExpression.cpp |
| diff --git a/Source/core/platform/text/RegularExpression.cpp b/Source/core/platform/text/RegularExpression.cpp |
| index 2b225ade22404e6a9f0898590695203021b4a10e..ad1a5beccb9c7d79d556c073c9f5db2c3da6e2d9 100644 |
| --- a/Source/core/platform/text/RegularExpression.cpp |
| +++ b/Source/core/platform/text/RegularExpression.cpp |
| @@ -2,6 +2,7 @@ |
| * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. |
| * Copyright (C) 2008 Collabora Ltd. |
| * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged |
| + * Copyright (C) 2013 Google Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| @@ -28,67 +29,79 @@ |
| #include "config.h" |
| #include "RegularExpression.h" |
| -#include <wtf/BumpPointerAllocator.h> |
| -#include <yarr/Yarr.h> |
| -#include "Logging.h" |
| +// FIXME: These seem like a layering violation, but converting the strings manually |
| +// without v8String is difficult, and calling into v8 without V8RecursionScope will |
| +// assert. Perhaps v8 basic utilities shouldn't be in bindings, or we should put |
| +// RegularExpression as some kind of abstract interface that's implemented in bindings. |
| +#include "V8Binding.h" |
| +#include "V8RecursionScope.h" |
| +#include <wtf/MainThread.h> |
| namespace WebCore { |
| -RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) |
| - : m_numSubpatterns(0) |
| - , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode)) |
| +static v8::Local<v8::Context> regexContext() |
| { |
| + ASSERT(isMainThread()); |
| + static ScopedPersistent<v8::Context>* staticRegexContext = new ScopedPersistent<v8::Context>(v8::Context::New()); |
| + return v8::Local<v8::Context>::New(staticRegexContext->get()); |
| } |
| -PassOwnPtr<JSC::Yarr::BytecodePattern> RegularExpression::compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) |
| +RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) |
| { |
| - const char* constructionError = 0; |
| - JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &constructionError); |
| - if (constructionError) { |
| - LOG_ERROR("RegularExpression: YARR compile failed with '%s'", constructionError); |
| - return nullptr; |
| - } |
| - |
| - m_numSubpatterns = pattern.m_numSubpatterns; |
| - |
| - return JSC::Yarr::byteCompile(pattern, &m_regexAllocator); |
| + v8::HandleScope handleScope; |
| + v8::Local<v8::Context> context(regexContext()); |
| + v8::Context::Scope scope(context); |
| + |
| + unsigned flags = v8::RegExp::kNone; |
| + if (caseSensitivity == TextCaseInsensitive) |
| + flags |= v8::RegExp::kIgnoreCase; |
| + if (multilineMode == MultilineEnabled) |
| + flags |= v8::RegExp::kMultiline; |
| + |
| + v8::TryCatch tryCatch; |
| + m_regex.set(v8::RegExp::New(v8String(pattern, context->GetIsolate()), static_cast<v8::RegExp::Flags>(flags))); |
|
adamk
2013/04/22 17:48:03
I think it would be clearer if you split this into
|
| } |
| -int RegularExpression::match(const String& str, int startFrom, int* matchLength) const |
| +int RegularExpression::match(const String& string, int startFrom, int* matchLength) const |
| { |
| - if (!m_regExpByteCode) |
| + if (m_regex.isEmpty() || string.isNull()) |
| return -1; |
| - if (str.isNull()) |
| + // v8 strings are limited to int. |
| + if (string.length() > INT_MAX) |
| return -1; |
| - int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
| - unsigned* offsetVector; |
| - Vector<unsigned, 32> nonReturnedOvector; |
| + v8::HandleScope handleScope; |
| + v8::Local<v8::Context> context(regexContext()); |
| + v8::Context::Scope scope(context); |
| + v8::TryCatch tryCatch; |
| - nonReturnedOvector.resize(offsetVectorSize); |
| - offsetVector = nonReturnedOvector.data(); |
| + V8RecursionScope::MicrotaskSuppression microtaskScope; |
| - ASSERT(offsetVector); |
| - for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) |
| - offsetVector[j] = JSC::Yarr::offsetNoMatch; |
| + v8::Local<v8::Function> exec = m_regex->Get(v8::String::NewSymbol("exec")).As<v8::Function>(); |
| - unsigned result; |
| - if (str.length() <= INT_MAX) |
| - result = JSC::Yarr::interpret(m_regExpByteCode.get(), str, startFrom, offsetVector); |
| - else { |
| - // This code can't handle unsigned offsets. Limit our processing to strings with offsets that |
| - // can be represented as ints. |
| - result = JSC::Yarr::offsetNoMatch; |
| - } |
| + v8::Handle<v8::Value> argv[] = { v8String(string, context->GetIsolate()) }; |
| + v8::Local<v8::Value> returnValue = exec->Call(m_regex.get(), 1, argv); |
| + |
| + // RegExp#exec returns null if there's no match, otherwise it returns an |
| + // Array of strings with the first being the whole match string and others |
| + // being subgroups. The Array also has some random properties tacked on like |
| + // "index" which is the offset of the match. |
| + // |
| + // https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/RegExp/exec |
| - if (result == JSC::Yarr::offsetNoMatch) |
| + if (!returnValue->IsArray()) |
| return -1; |
| - // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector. |
| - if (matchLength) |
| - *matchLength = offsetVector[1] - offsetVector[0]; |
| - return offsetVector[0]; |
| + v8::Local<v8::Array> result = returnValue.As<v8::Array>(); |
| + int matchOffset = result->Get(v8::String::NewSymbol("index"))->ToInt32()->Value(); |
| + |
| + if (matchLength) { |
| + v8::Local<v8::String> match = result->Get(0).As<v8::String>(); |
| + *matchLength = match->Length(); |
| + } |
| + |
| + return matchOffset; |
| } |
| void replace(String& string, const RegularExpression& target, const String& replacement) |