Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(176)

Side by Side Diff: Source/core/platform/text/RegularExpression.cpp

Issue 13896017: Switch RegularExpression from YARR to V8 (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « Source/core/platform/text/RegularExpression.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd. 3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged 4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 10 matching lines...) Expand all
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */ 26 */
27 27
28 #include "config.h" 28 #include "config.h"
29 #include "RegularExpression.h" 29 #include "RegularExpression.h"
30 30
31 #include <wtf/BumpPointerAllocator.h> 31 // FIXME: These seem like a layering violation, but converting the strings manua lly
32 #include <yarr/Yarr.h> 32 // without v8String is difficult, and calling into v8 without V8RecursionScope w ill
33 #include "Logging.h" 33 // assert. Perhaps v8 basic utilities shouldn't be in bindings, or we should put
34 // RegularExpression as some kind of abstract interface that's implemented in bi ndings.
35 #include "V8Binding.h"
36 #include "V8RecursionScope.h"
34 37
35 namespace WebCore { 38 namespace WebCore {
36 39
37 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) 40 static v8::Handle<v8::Context> regexContext()
abarth-chromium 2013/04/21 22:31:52 This copies a persistent handle, which is a patter
esprehn 2013/04/21 23:11:10 So I can do return v8::Local<v8::Context>::New(sta
abarth-chromium 2013/04/21 23:57:00 Yep.
38 : m_numSubpatterns(0)
39 , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode))
40 { 41 {
42 static ScopedPersistent<v8::Context>* staticRegexContext = new ScopedPersist ent<v8::Context>(v8::Context::New());
adamk 2013/04/22 16:40:38 I think an ASSERT(isMainThread()) would be good at
43 return staticRegexContext->get();
41 } 44 }
42 45
43 PassOwnPtr<JSC::Yarr::BytecodePattern> RegularExpression::compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) 46 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
44 { 47 {
45 const char* constructionError = 0; 48 v8::HandleScope handleScope;
46 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseIn sensitive), (multilineMode == MultilineEnabled), &constructionError); 49 v8::Handle<v8::Context> context(regexContext());
47 if (constructionError) { 50 v8::Context::Scope scope(context);
abarth-chromium 2013/04/21 22:31:52 It looks like regexContext() should return a new L
48 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", constructi onError); 51
49 return nullptr; 52 unsigned flags = v8::RegExp::kNone;
abarth-chromium 2013/04/21 22:31:52 Can we declare flags to be of type v8::RegExp::Fla
esprehn 2013/04/21 23:11:10 Nope, the compiler won't let you do |= if you do t
abarth-chromium 2013/04/21 23:57:00 Ok.
53 if (caseSensitivity == TextCaseInsensitive)
54 flags |= v8::RegExp::kIgnoreCase;
55 if (multilineMode == MultilineEnabled)
56 flags |= v8::RegExp::kMultiline;
57
58 v8::TryCatch tryCatch;
abarth-chromium 2013/04/21 22:31:52 Do we need to check anything about this object at
esprehn 2013/04/21 23:11:10 I don't think so, but adamk would know more. I sup
59 m_regex.set(v8::RegExp::New(v8String(pattern, context->GetIsolate()), static _cast<v8::RegExp::Flags>(flags)));
60 }
61
62 int RegularExpression::match(const String& string, int startFrom, int* matchLeng th) const
63 {
64 if (m_regex.isEmpty() || string.isNull())
65 return -1;
66
67 // v8 strings are limited to int.
68 if (string.length() > INT_MAX)
69 return -1;
70
71 v8::HandleScope handleScope;
72 v8::Handle<v8::Context> context(regexContext());
73 v8::Context::Scope scope(context);
74 v8::TryCatch tryCatch;
75
76 V8RecursionScope::MicrotaskSuppression microtaskScope;
77
78 v8::Local<v8::Function> exec = m_regex->Get(v8::String::NewSymbol("exec")).A s<v8::Function>();
abarth-chromium 2013/04/21 22:31:52 Does this ASSERT if we don't get a v8::Function ou
esprehn 2013/04/21 23:11:10 It does with V8_ENABLE_CHECKS enabled which could
abarth-chromium 2013/04/21 23:57:00 As long as it ASSERTs with V8_ENABLE_CHECKS, I thi
79
80 v8::Handle<v8::Value> argv[] = { v8String(string, context->GetIsolate()) };
81 v8::Local<v8::Value> returnValue = exec->Call(m_regex.get(), 1, argv);
82
83 if (!returnValue->IsObject())
84 return -1;
85
86 v8::Local<v8::Object> result = returnValue.As<v8::Object>();
87 double index = result->Get(v8::String::NewSymbol("index")).As<v8::Number>()- >Value();
adamk 2013/04/22 16:40:38 Note that the V8 API exposes some more int-like ty
88
89 if (matchLength) {
90 v8::Local<v8::String> match = result->Get(0).As<v8::String>();
adamk 2013/04/22 16:40:38 To someone not familiar with the JS RegExp API, th
91 *matchLength = match->Length();
50 } 92 }
51 93
52 m_numSubpatterns = pattern.m_numSubpatterns; 94 return static_cast<int>(index);
53
54 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
55 }
56
57 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
58 {
59 if (!m_regExpByteCode)
60 return -1;
61
62 if (str.isNull())
63 return -1;
64
65 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
66 unsigned* offsetVector;
67 Vector<unsigned, 32> nonReturnedOvector;
68
69 nonReturnedOvector.resize(offsetVectorSize);
70 offsetVector = nonReturnedOvector.data();
71
72 ASSERT(offsetVector);
73 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
74 offsetVector[j] = JSC::Yarr::offsetNoMatch;
75
76 unsigned result;
77 if (str.length() <= INT_MAX)
78 result = JSC::Yarr::interpret(m_regExpByteCode.get(), str, startFrom, of fsetVector);
79 else {
80 // This code can't handle unsigned offsets. Limit our processing to stri ngs with offsets that
81 // can be represented as ints.
82 result = JSC::Yarr::offsetNoMatch;
83 }
84
85 if (result == JSC::Yarr::offsetNoMatch)
86 return -1;
87
88 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
89 if (matchLength)
90 *matchLength = offsetVector[1] - offsetVector[0];
91 return offsetVector[0];
92 } 95 }
93 96
94 void replace(String& string, const RegularExpression& target, const String& repl acement) 97 void replace(String& string, const RegularExpression& target, const String& repl acement)
95 { 98 {
96 int index = 0; 99 int index = 0;
97 while (index < static_cast<int>(string.length())) { 100 while (index < static_cast<int>(string.length())) {
98 int matchLength; 101 int matchLength;
99 index = target.match(string, index, &matchLength); 102 index = target.match(string, index, &matchLength);
100 if (index < 0) 103 if (index < 0)
101 break; 104 break;
102 string.replace(index, matchLength, replacement); 105 string.replace(index, matchLength, replacement);
103 index += replacement.length(); 106 index += replacement.length();
104 if (!matchLength) 107 if (!matchLength)
105 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* 108 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
106 } 109 }
107 } 110 }
108 111
109 } // namespace WebCore 112 } // namespace WebCore
OLDNEW
« no previous file with comments | « Source/core/platform/text/RegularExpression.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698