Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Unified Diff: src/unicode-inl.h

Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option (Closed) Base URL: git://github.com/v8/v8.git@master
Patch Set: DISALLOW_INVALID_UTF8 flag and fixes Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« src/api.cc ('K') | « src/unicode.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/unicode-inl.h
diff --git a/src/unicode-inl.h b/src/unicode-inl.h
index f861f9f2d47449945d62a6fbc8044abbcd0b2a2b..3d1c503cbef783d1bd5cd8ae30bb02273a55973d 100644
--- a/src/unicode-inl.h
+++ b/src/unicode-inl.h
@@ -107,8 +107,17 @@ unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
return 2;
}
+// Encode encodes the unicode code point c into the given str buffer. Unless
+// allow_invalid is set to true, surrogate code points will be replaced with
+// kReplacementCharacter. The caller is required to combine surrogate pairs
+// into code points before calling Encode.
+unsigned Utf8::Encode(char* str, uchar c, bool allow_invalid) {
+ if (!allow_invalid &&
dcarney 2014/01/07 10:12:16 move this block down into the kMaxThreeByteChar cl
+ (Utf16::IsLeadSurrogate(c) ||
+ Utf16::IsTrailSurrogate(c))) {
+ c = kReplacementCharacter;
+ }
-unsigned Utf8::Encode(char* str, uchar c, int previous) {
static const int kMask = ~(1 << 6);
if (c <= kMaxOneByteChar) {
str[0] = c;
@@ -118,13 +127,6 @@ unsigned Utf8::Encode(char* str, uchar c, int previous) {
str[1] = 0x80 | (c & kMask);
return 2;
} else if (c <= kMaxThreeByteChar) {
- if (Utf16::IsTrailSurrogate(c) &&
- Utf16::IsLeadSurrogate(previous)) {
- const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
- return Encode(str - kUnmatchedSize,
- Utf16::CombineSurrogatePair(previous, c),
- Utf16::kNoPreviousCharacter) - kUnmatchedSize;
- }
str[0] = 0xE0 | (c >> 12);
str[1] = 0x80 | ((c >> 6) & kMask);
str[2] = 0x80 | (c & kMask);
@@ -150,6 +152,7 @@ uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) {
return CalculateValue(bytes, length, cursor);
}
+// @TODO give this the same semantics as Encode?
dcarney 2014/01/07 11:05:50 i don't see an easy way to do this. You'd have to
unsigned Utf8::Length(uchar c, int previous) {
if (c <= kMaxOneByteChar) {
return 1;
« src/api.cc ('K') | « src/unicode.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698