net/base/net_util.cc - Issue 3011012: Add URL filter to trigger punycode for Japanese homographic sequences....

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: net/base/net_util.cc

Issue 3011012: Add URL filter to trigger punycode for Japanese homographic sequences.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: net/base/net_util.cc

===================================================================

--- net/base/net_util.cc (revision 52955)

+++ net/base/net_util.cc (working copy)

@@ -6,6 +6,7 @@

#include <algorithm>

#include <map>

+#include <unicode/regex.h>

#include <unicode/ucnv.h>

#include <unicode/uidna.h>

#include <unicode/ulocdata.h>

@@ -621,6 +622,13 @@

L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"

L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"

L"[\ufffa-\ufffd]]"), status);

+ DCHECK(U_SUCCESS(status));

+ icu::RegexMatcher dangerous_patterns(icu::UnicodeString(

+ // Lone katakana no, so, or n

+ L"([^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"

+ // Repeating Japanese accent characters

+ L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c])"),

+ 0, status);

#else

icu::UnicodeSet dangerous_characters(icu::UnicodeString(

"[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"

@@ -631,13 +639,26 @@

"\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"

"\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"

"[\\ufffa-\\ufffd]]", -1, US_INV), status);

+ DCHECK(U_SUCCESS(status));

+ icu::RegexMatcher dangerous_patterns(icu::UnicodeString(

+ // Lone katakana no, so, or n

+ "([^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"

+ // Repeating Japanese accent characters

+ "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c])"),

+ 0, status);

#endif

DCHECK(U_SUCCESS(status));

icu::UnicodeSet component_characters;

- component_characters.addAll(icu::UnicodeString(str, str_len));

+ icu::UnicodeString component_string(str, str_len);

+ component_characters.addAll(component_string);

if (dangerous_characters.containsSome(component_characters))

return false;

+ DCHECK(U_SUCCESS(status));

+ dangerous_patterns.reset(component_string);

+ if (dangerous_patterns.find())

+ return false;

// If the language list is empty, the result is completely determined

// by whether a component is a single script or not. This will block

// even "safe" script mixing cases like <Chinese, Latin-ASCII> that are

« no previous file with comments | « no previous file | net/base/net_util_unittest.cc » ('j') | no next file with comments »