Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: src/api.cc

Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option (Closed) Base URL: git://github.com/v8/v8.git@master
Patch Set: Created 6 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/unicode.h » ('j') | src/unicode.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/api.cc
diff --git a/src/api.cc b/src/api.cc
index 9a68f639efec56559c0aa7ffe76a58ada5776fc4..d03601dcf475a0c750b8286f7387c9ce9c4ac6c4 100644
--- a/src/api.cc
+++ b/src/api.cc
@@ -4514,27 +4514,40 @@ class Utf8WriterVisitor {
utf16_chars_read_(0) {
}
- static int WriteEndCharacter(uint16_t character,
- int last_character,
+ // WritePair writes the current UTF-16 code unit to the given buffer. The
+ // function will go back inside the buffer to combine surrogate pairs.
+ // @TODO use uint16_t for previous?
dcarney 2014/01/04 15:56:45 previous is an int because of some special values
haimuiba 2014/01/06 05:40:18 Makes sense. Thx.
+ static int WritePair(uint16_t current, int previous, char* buffer) {
+ using namespace unibrow;
+ int code_point = current;
+ int written = 0;
+ if (Utf16::IsSurrogatePair(previous, current)) {
+ code_point = Utf16::CombineSurrogatePair(previous, current);
+ buffer -= Utf8::kSizeOfUnmatchedSurrogate;
+ written -= Utf8::kSizeOfUnmatchedSurrogate;
+ }
+ return written + Utf8::Encode(buffer, code_point, false);
dcarney 2014/01/04 15:56:45 having the length calculation here is too late. S
haimuiba 2014/01/06 05:40:18 Ok, I'll take a closer look.
+ }
+
+ // @TODO use uint16_t for previous?
+ static int WriteEndCharacter(uint16_t current,
+ int previous,
int remaining,
char* const buffer) {
using namespace unibrow;
ASSERT(remaining > 0);
- // We can't use a local buffer here because Encode needs to modify
- // previous characters in the stream. We know, however, that
- // exactly one character will be advanced.
- if (Utf16::IsTrailSurrogate(character) &&
- Utf16::IsLeadSurrogate(last_character)) {
- int written = Utf8::Encode(buffer, character, last_character);
+ // We can't use a local buffer here because WritePair needs to modify
+ // previous characters in the stream. We know, however, that exactly one
+ // character will be advanced.
+ if (Utf16::IsSurrogatePair(previous, current)) {
+ int written = WritePair(current, previous, buffer);
ASSERT(written == 1);
return written;
}
// Use a scratch buffer to check the required characters.
char temp_buffer[Utf8::kMaxEncodedSize];
// Can't encode using last_character as gcc has array bounds issues.
- int written = Utf8::Encode(temp_buffer,
- character,
- Utf16::kNoPreviousCharacter);
+ int written = WritePair(current, Utf16::kNoPreviousCharacter, temp_buffer);
// Won't fit.
if (written > remaining) return 0;
// Copy over the character from temp_buffer.
@@ -4581,7 +4594,7 @@ class Utf8WriterVisitor {
} else {
for (; i < fast_length; i++) {
uint16_t character = *chars++;
- buffer += Utf8::Encode(buffer, character, last_character);
+ buffer += WritePair(character, last_character, buffer);
last_character = character;
ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_);
}
« no previous file with comments | « no previous file | src/unicode.h » ('j') | src/unicode.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698