Index: src/objects.h |
=================================================================== |
--- src/objects.h (revision 10944) |
+++ src/objects.h (working copy) |
@@ -6509,13 +6509,18 @@ |
inline bool has_trivial_hash(); |
// Add a character to the hash and update the array index calculation. |
- inline void AddCharacter(uc32 c); |
+ inline void AddCharacter(uint32_t c); |
// Adds a character to the hash but does not update the array index |
// calculation. This can only be called when it has been verified |
// that the input is not an array index. |
- inline void AddCharacterNoIndex(uc32 c); |
+ inline void AddCharacterNoIndex(uint32_t c); |
+ // Add a character above 0xffff as a surrogate pair. These can get into |
+ // the hasher through the routines that take a UTF-8 string and make a symbol. |
+ void AddSurrogatePair(uc32 c); |
+ void AddSurrogatePairNoIndex(uc32 c); |
+ |
// Returns the value to store in the hash field of a string with |
// the given length and contents. |
uint32_t GetHashField(); |
@@ -6764,8 +6769,21 @@ |
RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL, |
int* length_output = 0); |
- inline int Utf8Length() { return Utf8Length(this, 0, length()); } |
- static int Utf8Length(String* input, int from, int to); |
+ inline int Utf8Length() { |
rossberg
2012/03/07 13:32:47
Even with the inline definition here, I think a co
Erik Corry
2012/03/11 19:29:22
This issue disappears with the move to a handle ba
|
+ bool dummy; |
+ bool failure = false; |
+ int len = Utf8Length(this, 0, length(), false, 100, &failure, &dummy); |
+ if (failure) return -1; |
+ return len; |
+ } |
+ // Call again on a flatter string if the failure variable gets set. |
+ static int Utf8Length(String* input, |
rossberg
2012/03/07 13:32:47
Please specify the result of these functions in th
Erik Corry
2012/03/11 19:29:22
Done. (They are coded as 'illegal' UTF-8 sequence
|
+ int from, |
+ int to, |
+ bool preceeded_by_surrogate, |
+ int max_recursion, |
rossberg
2012/03/07 13:32:47
Is this a useful interface? How is a caller suppos
Erik Corry
2012/03/11 19:29:22
Yes, this is the correct solution. This means it
|
+ bool* failure, |
+ bool* ends_in_surrogate); |
// Return a 16 bit Unicode representation of the string. |
// The string should be nearly flat, otherwise the performance of |
@@ -6832,7 +6850,7 @@ |
// Max ASCII char code. |
static const int kMaxAsciiCharCode = unibrow::Utf8::kMaxOneByteChar; |
static const unsigned kMaxAsciiCharCodeU = unibrow::Utf8::kMaxOneByteChar; |
- static const int kMaxUC16CharCode = 0xffff; |
+ static const int kMaxUtf16CodeUnit = 0xffff; |
// Mask constant for checking if a string has a computed hash code |
// and if it is an array index. The least significant bit indicates |