Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(956)

Unified Diff: src/runtime.cc

Issue 59853006: Fix y-umlaut to uppercase. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/heap.cc ('k') | src/unicode.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index 107297c944701f3e3570145168d11b66b30bc3a7..7486181b00f19f1c6e994712f88ff1cf3c6750ad 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -6194,6 +6194,7 @@ template <class Converter>
MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
Isolate* isolate,
String* s,
+ String::Encoding result_encoding,
int length,
int input_string_length,
unibrow::Mapping<Converter, 128>* mapping) {
@@ -6209,7 +6210,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
Object* o;
- { MaybeObject* maybe_o = s->IsOneByteRepresentation()
+ { MaybeObject* maybe_o = result_encoding == String::ONE_BYTE_ENCODING
? isolate->heap()->AllocateRawOneByteString(length)
: isolate->heap()->AllocateRawTwoByteString(length);
if (!maybe_o->ToObject(&o)) return maybe_o;
@@ -6217,6 +6218,8 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
String* result = String::cast(o);
bool has_changed_character = false;
+ DisallowHeapAllocation no_gc;
+
// Convert all characters to upper case, assuming that they will fit
// in the buffer
Access<ConsStringIteratorOp> op(
@@ -6225,6 +6228,10 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
unibrow::uchar chars[Converter::kMaxWidth];
// We can assume that the string is not empty
uc32 current = stream.GetNext();
+ // y with umlauts is the only character that stops fitting into one-byte
+ // when converting to uppercase.
+ static const uc32 yuml_code = 0xff;
Sven Panne 2013/11/07 07:07:24 Hmmm, is this really the only case? Look e.g. at i
dcarney 2013/11/07 07:13:33 This code does not take locale into account. It's
+ bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower;
for (int i = 0; i < length;) {
bool has_next = stream.HasMore();
uc32 next = has_next ? stream.GetNext() : 0;
@@ -6233,13 +6240,14 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
// The case conversion of this character is the character itself.
result->Set(i, current);
i++;
- } else if (char_length == 1) {
+ } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) {
// Common case: converting the letter resulted in one character.
ASSERT(static_cast<uc32>(chars[0]) != current);
result->Set(i, chars[0]);
has_changed_character = true;
i++;
} else if (length == input_string_length) {
+ bool found_yuml = (current == yuml_code);
// We've assumed that the result would be as long as the
// input but here is a character that converts to several
// characters. No matter, we calculate the exact length
@@ -6259,6 +6267,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
int current_length = i + char_length + next_length;
while (stream.HasMore()) {
current = stream.GetNext();
+ found_yuml |= (current == yuml_code);
// NOTE: we use 0 as the next character here because, while
// the next character may affect what a character converts to,
// it does not in any case affect the length of what it convert
@@ -6271,8 +6280,10 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
return Failure::OutOfMemoryException(0x13);
}
}
- // Try again with the real length.
- return Smi::FromInt(current_length);
+ // Try again with the real length. Return signed if we need
+ // to allocate a two-byte string for y-umlaut to uppercase.
+ return (found_yuml && !ignore_yuml) ? Smi::FromInt(-current_length)
+ : Smi::FromInt(current_length);
} else {
for (int j = 0; j < char_length; j++) {
result->Set(i, chars[j]);
@@ -6318,121 +6329,107 @@ static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
}
-enum AsciiCaseConversion {
- ASCII_TO_LOWER,
- ASCII_TO_UPPER
-};
-
-
-template <AsciiCaseConversion dir>
-struct FastAsciiConverter {
- static bool Convert(char* dst, char* src, int length, bool* changed_out) {
+template<class Converter>
+static bool FastAsciiConvert(char* dst,
+ char* src,
+ int length,
+ bool* changed_out) {
#ifdef DEBUG
char* saved_dst = dst;
char* saved_src = src;
#endif
- // We rely on the distance between upper and lower case letters
- // being a known power of 2.
- ASSERT('a' - 'A' == (1 << 5));
- // Boundaries for the range of input characters than require conversion.
- const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;
- const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;
- bool changed = false;
- uintptr_t or_acc = 0;
- char* const limit = src + length;
+ DisallowHeapAllocation no_gc;
+ // We rely on the distance between upper and lower case letters
+ // being a known power of 2.
+ ASSERT('a' - 'A' == (1 << 5));
+ // Boundaries for the range of input characters than require conversion.
+ static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;
+ static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
+ bool changed = false;
+ uintptr_t or_acc = 0;
+ char* const limit = src + length;
#ifdef V8_HOST_CAN_READ_UNALIGNED
- // Process the prefix of the input that requires no conversion one
- // (machine) word at a time.
- while (src <= limit - sizeof(uintptr_t)) {
- uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
- or_acc |= w;
- if (AsciiRangeMask(w, lo, hi) != 0) {
- changed = true;
- break;
- }
- *reinterpret_cast<uintptr_t*>(dst) = w;
- src += sizeof(uintptr_t);
- dst += sizeof(uintptr_t);
- }
- // Process the remainder of the input performing conversion when
- // required one word at a time.
- while (src <= limit - sizeof(uintptr_t)) {
- uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
- or_acc |= w;
- uintptr_t m = AsciiRangeMask(w, lo, hi);
- // The mask has high (7th) bit set in every byte that needs
- // conversion and we know that the distance between cases is
- // 1 << 5.
- *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
- src += sizeof(uintptr_t);
- dst += sizeof(uintptr_t);
- }
-#endif
- // Process the last few bytes of the input (or the whole input if
- // unaligned access is not supported).
- while (src < limit) {
- char c = *src;
- or_acc |= c;
- if (lo < c && c < hi) {
- c ^= (1 << 5);
- changed = true;
- }
- *dst = c;
- ++src;
- ++dst;
- }
- if ((or_acc & kAsciiMask) != 0) {
- return false;
+ // Process the prefix of the input that requires no conversion one
+ // (machine) word at a time.
+ while (src <= limit - sizeof(uintptr_t)) {
+ uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
+ or_acc |= w;
+ if (AsciiRangeMask(w, lo, hi) != 0) {
+ changed = true;
+ break;
}
-#ifdef DEBUG
- CheckConvert(saved_dst, saved_src, length, changed);
+ *reinterpret_cast<uintptr_t*>(dst) = w;
+ src += sizeof(uintptr_t);
+ dst += sizeof(uintptr_t);
+ }
+ // Process the remainder of the input performing conversion when
+ // required one word at a time.
+ while (src <= limit - sizeof(uintptr_t)) {
+ uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
+ or_acc |= w;
+ uintptr_t m = AsciiRangeMask(w, lo, hi);
+ // The mask has high (7th) bit set in every byte that needs
+ // conversion and we know that the distance between cases is
+ // 1 << 5.
+ *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
+ src += sizeof(uintptr_t);
+ dst += sizeof(uintptr_t);
+ }
#endif
- *changed_out = changed;
- return true;
+ // Process the last few bytes of the input (or the whole input if
+ // unaligned access is not supported).
+ while (src < limit) {
+ char c = *src;
+ or_acc |= c;
+ if (lo < c && c < hi) {
+ c ^= (1 << 5);
+ changed = true;
+ }
+ *dst = c;
+ ++src;
+ ++dst;
+ }
+ if ((or_acc & kAsciiMask) != 0) {
+ return false;
}
+ ASSERT(CheckFastAsciiConvert(
+ saved_dst, saved_src, length, changed, Converter::kIsToLower));
+
+ *changed_out = changed;
+ return true;
+}
+
#ifdef DEBUG
- static void CheckConvert(char* dst, char* src, int length, bool changed) {
- bool expected_changed = false;
- for (int i = 0; i < length; i++) {
- if (dst[i] == src[i]) continue;
- expected_changed = true;
- if (dir == ASCII_TO_LOWER) {
- ASSERT('A' <= src[i] && src[i] <= 'Z');
- ASSERT(dst[i] == src[i] + ('a' - 'A'));
- } else {
- ASSERT(dir == ASCII_TO_UPPER);
- ASSERT('a' <= src[i] && src[i] <= 'z');
- ASSERT(dst[i] == src[i] - ('a' - 'A'));
- }
+static bool CheckFastAsciiConvert(char* dst,
+ char* src,
+ int length,
+ bool changed,
+ bool is_to_lower) {
+ bool expected_changed = false;
+ for (int i = 0; i < length; i++) {
+ if (dst[i] == src[i]) continue;
+ expected_changed = true;
+ if (is_to_lower) {
+ ASSERT('A' <= src[i] && src[i] <= 'Z');
+ ASSERT(dst[i] == src[i] + ('a' - 'A'));
+ } else {
+ ASSERT('a' <= src[i] && src[i] <= 'z');
+ ASSERT(dst[i] == src[i] - ('a' - 'A'));
}
- ASSERT(expected_changed == changed);
}
+ return (expected_changed == changed);
+}
#endif
-};
-
-
-struct ToLowerTraits {
- typedef unibrow::ToLowercase UnibrowConverter;
-
- typedef FastAsciiConverter<ASCII_TO_LOWER> AsciiConverter;
-};
-
-
-struct ToUpperTraits {
- typedef unibrow::ToUppercase UnibrowConverter;
-
- typedef FastAsciiConverter<ASCII_TO_UPPER> AsciiConverter;
-};
} // namespace
-template <typename ConvertTraits>
+template <class Converter>
MUST_USE_RESULT static MaybeObject* ConvertCase(
Arguments args,
Isolate* isolate,
- unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) {
+ unibrow::Mapping<Converter, 128>* mapping) {
SealHandleScope shs(isolate);
CONVERT_ARG_CHECKED(String, s, 0);
s = s->TryFlattenGetString();
@@ -6454,7 +6451,7 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(
}
SeqOneByteString* result = SeqOneByteString::cast(o);
bool has_changed_character;
- bool is_ascii = ConvertTraits::AsciiConverter::Convert(
+ bool is_ascii = FastAsciiConvert<Converter>(
reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
length,
@@ -6465,31 +6462,35 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(
}
}
+ String::Encoding result_encoding = s->IsOneByteRepresentationUnderneath()
+ ? String::ONE_BYTE_ENCODING : String::TWO_BYTE_ENCODING;
Object* answer;
- { MaybeObject* maybe_answer =
- ConvertCaseHelper(isolate, s, length, length, mapping);
+ { MaybeObject* maybe_answer = ConvertCaseHelper(
+ isolate, s, result_encoding, length, length, mapping);
if (!maybe_answer->ToObject(&answer)) return maybe_answer;
}
if (answer->IsSmi()) {
- // Retry with correct length.
- { MaybeObject* maybe_answer =
- ConvertCaseHelper(isolate,
- s, Smi::cast(answer)->value(), length, mapping);
- if (!maybe_answer->ToObject(&answer)) return maybe_answer;
+ int new_length = Smi::cast(answer)->value();
+ if (new_length < 0) {
+ result_encoding = String::TWO_BYTE_ENCODING;
+ new_length = -new_length;
}
+ MaybeObject* maybe_answer = ConvertCaseHelper(
+ isolate, s, result_encoding, new_length, length, mapping);
+ if (!maybe_answer->ToObject(&answer)) return maybe_answer;
}
return answer;
}
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) {
- return ConvertCase<ToLowerTraits>(
+ return ConvertCase(
args, isolate, isolate->runtime_state()->to_lower_mapping());
}
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) {
- return ConvertCase<ToUpperTraits>(
+ return ConvertCase(
args, isolate, isolate->runtime_state()->to_upper_mapping());
}
« no previous file with comments | « src/heap.cc ('k') | src/unicode.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698