src/unicode-inl.h - Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/unicode-inl.h

Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option (Closed) Base URL: git://github.com/v8/v8.git@master

Patch Set: Latest version Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/unicode-inl.h

diff --git a/src/unicode-inl.h b/src/unicode-inl.h

index f861f9f2d47449945d62a6fbc8044abbcd0b2a2b..0210b93cb9251229262cc380013e7e7245377a7a 100644

--- a/src/unicode-inl.h

+++ b/src/unicode-inl.h

@@ -107,8 +107,13 @@ unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {

return 2;

}

-unsigned Utf8::Encode(char* str, uchar c, int previous) {

+// Encode encodes the UTF-16 code units c and previous into the given str

+// buffer. Unless allow_invalid is set to true, surrogate code points will be

+// replaced with kReplacementCharacter.

+unsigned Utf8::Encode(char* str,

+ uchar c,

+ int previous,

+ bool allow_invalid = true) {

dcarney 2014/01/13 09:19:56 default should be in declaration

haimuiba 2014/01/15 10:52:34 Done.

static const int kMask = ~(1 << 6);

if (c <= kMaxOneByteChar) {

str[0] = c;

@@ -118,12 +123,16 @@ unsigned Utf8::Encode(char* str, uchar c, int previous) {

str[1] = 0x80 | (c & kMask);

return 2;

} else if (c <= kMaxThreeByteChar) {

- if (Utf16::IsTrailSurrogate(c) &&

- Utf16::IsLeadSurrogate(previous)) {

+ if (Utf16::IsSurrogatePair(previous, c)) {

const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;

return Encode(str - kUnmatchedSize,

Utf16::CombineSurrogatePair(previous, c),

- Utf16::kNoPreviousCharacter) - kUnmatchedSize;

+ Utf16::kNoPreviousCharacter,

+ allow_invalid) - kUnmatchedSize;

+ } else if (!allow_invalid &&

+ (Utf16::IsLeadSurrogate(c) ||

+ Utf16::IsTrailSurrogate(c))) {

+ c = kBadChar;

}

str[0] = 0xE0 | (c >> 12);

str[1] = 0x80 | ((c >> 6) & kMask);

« src/unicode.h ('K') | « src/unicode.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »