src/unicode.h - Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/unicode.h

Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option (Closed) Base URL: git://github.com/v8/v8.git@master

Patch Set: Rebase Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/unicode.h

diff --git a/src/unicode.h b/src/unicode.h

index 6ba61d0e17b2a0b6ff7702c422c38ddf9276318a..bb5506d38e2531b0f60fd73f8676c3ad77277ab7 100644

--- a/src/unicode.h

+++ b/src/unicode.h

@@ -102,6 +102,9 @@ class UnicodeData {

class Utf16 {

public:

+ static inline bool IsSurrogatePair(int lead, int trail) {

+ return IsLeadSurrogate(lead) && IsTrailSurrogate(trail);

+ }

static inline bool IsLeadSurrogate(int code) {

if (code == kNoPreviousCharacter) return false;

return (code & 0xfc00) == 0xd800;

@@ -146,11 +149,16 @@ class Utf8 {

public:

static inline uchar Length(uchar chr, int previous);

static inline unsigned EncodeOneByte(char* out, uint8_t c);

- static inline unsigned Encode(

- char* out, uchar c, int previous);

+ static inline unsigned Encode(char* out,

+ uchar c,

+ int previous,

+ bool replace_invalid = false);

static uchar CalculateValue(const byte* str,

unsigned length,

unsigned* cursor);

+ // The unicode replacement character, used to signal invalid unicode

+ // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.

static const uchar kBadChar = 0xFFFD;

static const unsigned kMaxEncodedSize = 4;

static const unsigned kMaxOneByteChar = 0x7f;

@@ -162,6 +170,9 @@ class Utf8 {

// that match are coded as a 4 byte UTF-8 sequence.

static const unsigned kBytesSavedByCombiningSurrogates = 2;

static const unsigned kSizeOfUnmatchedSurrogate = 3;

+ // The maximum size a single UTF-16 code unit may take up when encoded as

+ // UTF-8.

+ static const unsigned kMax16BitCodeUnitSize = 3;

static inline uchar ValueOf(const byte* str,

unsigned length,

unsigned* cursor);

« no previous file with comments | « src/api.cc ('k') | src/unicode-inl.h » ('j') | no next file with comments »