Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(401)

Side by Side Diff: Source/core/platform/text/RegularExpression.cpp

Issue 13896017: Switch RegularExpression from YARR to V8 (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Review from adamk and abarth Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd. 3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged 4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
5 * Copyright (C) 2013 Google Inc. All rights reserved.
5 * 6 *
6 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
8 * are met: 9 * are met:
9 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
14 * 15 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 16 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */ 27 */
27 28
28 #include "config.h" 29 #include "config.h"
29 #include "RegularExpression.h" 30 #include "RegularExpression.h"
30 31
31 #include <wtf/BumpPointerAllocator.h> 32 // FIXME: These seem like a layering violation, but converting the strings manua lly
32 #include <yarr/Yarr.h> 33 // without v8String is difficult, and calling into v8 without V8RecursionScope w ill
33 #include "Logging.h" 34 // assert. Perhaps v8 basic utilities shouldn't be in bindings, or we should put
35 // RegularExpression as some kind of abstract interface that's implemented in bi ndings.
36 #include "V8Binding.h"
37 #include "V8RecursionScope.h"
38 #include <wtf/MainThread.h>
34 39
35 namespace WebCore { 40 namespace WebCore {
36 41
37 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) 42 static v8::Local<v8::Context> regexContext()
38 : m_numSubpatterns(0)
39 , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode))
40 { 43 {
44 ASSERT(isMainThread());
45 static ScopedPersistent<v8::Context>* staticRegexContext = new ScopedPersist ent<v8::Context>(v8::Context::New());
46 return v8::Local<v8::Context>::New(staticRegexContext->get());
41 } 47 }
42 48
43 PassOwnPtr<JSC::Yarr::BytecodePattern> RegularExpression::compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) 49 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
44 { 50 {
45 const char* constructionError = 0; 51 v8::HandleScope handleScope;
46 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseIn sensitive), (multilineMode == MultilineEnabled), &constructionError); 52 v8::Local<v8::Context> context(regexContext());
47 if (constructionError) { 53 v8::Context::Scope scope(context);
48 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", constructi onError); 54
49 return nullptr; 55 unsigned flags = v8::RegExp::kNone;
56 if (caseSensitivity == TextCaseInsensitive)
57 flags |= v8::RegExp::kIgnoreCase;
58 if (multilineMode == MultilineEnabled)
59 flags |= v8::RegExp::kMultiline;
60
61 v8::TryCatch tryCatch;
62 m_regex.set(v8::RegExp::New(v8String(pattern, context->GetIsolate()), static _cast<v8::RegExp::Flags>(flags)));
adamk 2013/04/22 17:48:03 I think it would be clearer if you split this into
63 }
64
65 int RegularExpression::match(const String& string, int startFrom, int* matchLeng th) const
66 {
67 if (m_regex.isEmpty() || string.isNull())
68 return -1;
69
70 // v8 strings are limited to int.
71 if (string.length() > INT_MAX)
72 return -1;
73
74 v8::HandleScope handleScope;
75 v8::Local<v8::Context> context(regexContext());
76 v8::Context::Scope scope(context);
77 v8::TryCatch tryCatch;
78
79 V8RecursionScope::MicrotaskSuppression microtaskScope;
80
81 v8::Local<v8::Function> exec = m_regex->Get(v8::String::NewSymbol("exec")).A s<v8::Function>();
82
83 v8::Handle<v8::Value> argv[] = { v8String(string, context->GetIsolate()) };
84 v8::Local<v8::Value> returnValue = exec->Call(m_regex.get(), 1, argv);
85
86 // RegExp#exec returns null if there's no match, otherwise it returns an
87 // Array of strings with the first being the whole match string and others
88 // being subgroups. The Array also has some random properties tacked on like
89 // "index" which is the offset of the match.
90 //
91 // https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Obje cts/RegExp/exec
92
93 if (!returnValue->IsArray())
94 return -1;
95
96 v8::Local<v8::Array> result = returnValue.As<v8::Array>();
97 int matchOffset = result->Get(v8::String::NewSymbol("index"))->ToInt32()->Va lue();
98
99 if (matchLength) {
100 v8::Local<v8::String> match = result->Get(0).As<v8::String>();
101 *matchLength = match->Length();
50 } 102 }
51 103
52 m_numSubpatterns = pattern.m_numSubpatterns; 104 return matchOffset;
53
54 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
55 }
56
57 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
58 {
59 if (!m_regExpByteCode)
60 return -1;
61
62 if (str.isNull())
63 return -1;
64
65 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
66 unsigned* offsetVector;
67 Vector<unsigned, 32> nonReturnedOvector;
68
69 nonReturnedOvector.resize(offsetVectorSize);
70 offsetVector = nonReturnedOvector.data();
71
72 ASSERT(offsetVector);
73 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
74 offsetVector[j] = JSC::Yarr::offsetNoMatch;
75
76 unsigned result;
77 if (str.length() <= INT_MAX)
78 result = JSC::Yarr::interpret(m_regExpByteCode.get(), str, startFrom, of fsetVector);
79 else {
80 // This code can't handle unsigned offsets. Limit our processing to stri ngs with offsets that
81 // can be represented as ints.
82 result = JSC::Yarr::offsetNoMatch;
83 }
84
85 if (result == JSC::Yarr::offsetNoMatch)
86 return -1;
87
88 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
89 if (matchLength)
90 *matchLength = offsetVector[1] - offsetVector[0];
91 return offsetVector[0];
92 } 105 }
93 106
94 void replace(String& string, const RegularExpression& target, const String& repl acement) 107 void replace(String& string, const RegularExpression& target, const String& repl acement)
95 { 108 {
96 int index = 0; 109 int index = 0;
97 while (index < static_cast<int>(string.length())) { 110 while (index < static_cast<int>(string.length())) {
98 int matchLength; 111 int matchLength;
99 index = target.match(string, index, &matchLength); 112 index = target.match(string, index, &matchLength);
100 if (index < 0) 113 if (index < 0)
101 break; 114 break;
102 string.replace(index, matchLength, replacement); 115 string.replace(index, matchLength, replacement);
103 index += replacement.length(); 116 index += replacement.length();
104 if (!matchLength) 117 if (!matchLength)
105 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* 118 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
106 } 119 }
107 } 120 }
108 121
109 } // namespace WebCore 122 } // namespace WebCore
OLDNEW
« Source/core/html/BaseTextInputType.cpp ('K') | « Source/core/platform/text/RegularExpression.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698