Chromium Code Reviews| Index: include/v8.h | 
| diff --git a/include/v8.h b/include/v8.h | 
| index 480cbaa9553f295f6ad844f789e54fa128059402..9bc3e4423b9e3380d8c00974dc589491f5dcfa0f 100644 | 
| --- a/include/v8.h | 
| +++ b/include/v8.h | 
| @@ -1069,19 +1069,43 @@ class String : public Primitive { | 
| PRESERVE_ASCII_NULL = 4 | 
| }; | 
| - // 16-bit character codes. | 
| + | 
| + enum StringEncoding { | 
| + INVALID_ENCODING = 0, | 
| + UTF8_ENCODING = 1, | 
| 
 
Erik Corry
2012/08/21 12:21:34
The names are not UTF8 or UTF16, but rather UTF-8
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + LATIN1_ENCODING = 2, | 
| + UTF16_ENCODING = 3, | 
| + | 
| + STRICT_ASCII_HINT = 1 << 16, | 
| 
 
Erik Corry
2012/08/21 12:21:34
I think STRICT_ASCII_HINT should just be called AS
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + NOT_ASCII_HINT = 1 << 17 | 
| + }; | 
| + | 
| + static const int kStringEncodingMask = 3; | 
| + static const int kAsciiHintMask = | 
| + String::STRICT_ASCII_HINT | String::NOT_ASCII_HINT; | 
| + | 
| + static const int kUndefinedLength = -1; | 
| + | 
| + | 
| + // 16-bit UTF16 code units. | 
| V8EXPORT int Write(uint16_t* buffer, | 
| int start = 0, | 
| - int length = -1, | 
| + int length = kUndefinedLength, | 
| int options = NO_OPTIONS) const; | 
| // ASCII characters. | 
| V8EXPORT int WriteAscii(char* buffer, | 
| int start = 0, | 
| - int length = -1, | 
| + int length = kUndefinedLength, | 
| int options = NO_OPTIONS) const; | 
| + // Latin1 characters. | 
| 
 
Erik Corry
2012/08/21 12:21:34
This one doesn't support PRESERVE_ASCII_NULL, or r
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + V8EXPORT int WriteLatin1(char* buffer, | 
| + int start = 0, | 
| + int length = kUndefinedLength, | 
| + int options = NO_OPTIONS) const; | 
| + | 
| // UTF-8 encoded characters. | 
| V8EXPORT int WriteUtf8(char* buffer, | 
| - int length = -1, | 
| + int length = kUndefinedLength, | 
| int* nchars_ref = NULL, | 
| int options = NO_OPTIONS) const; | 
| @@ -1122,6 +1146,7 @@ class String : public Primitive { | 
| void operator=(const ExternalStringResourceBase&); | 
| friend class v8::internal::Heap; | 
| + friend class v8::String; | 
| }; | 
| /** | 
| @@ -1181,6 +1206,16 @@ class String : public Primitive { | 
| }; | 
| /** | 
| + * An ExternalLatin1StringResource is a wrapper around an Latin1-encoded | 
| + * string buffer that resides outside V8's heap. For usage in V8, Latin1 | 
| + * strings are converted to ASCII or two-byte string depending on whether | 
| 
 
Erik Corry
2012/08/21 12:21:34
string -> strings,
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * the string contains non-ASCII characters. | 
| + */ | 
| + class V8EXPORT ExternalLatin1StringResource | 
| + : public ExternalAsciiStringResource { | 
| + }; | 
| + | 
| + /** | 
| * Get the ExternalStringResource for an external string. Returns | 
| * NULL if IsExternal() doesn't return true. | 
| */ | 
| @@ -1193,24 +1228,44 @@ class String : public Primitive { | 
| V8EXPORT const ExternalAsciiStringResource* GetExternalAsciiStringResource() | 
| const; | 
| + /** | 
| + * If the string is external, return the its encoding (Latin1 or UTF16) | 
| 
 
Erik Corry
2012/08/21 12:21:34
the its -> its
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * and possibly a hint on whether the content is ASCII. | 
| + * Return String::INVALID_ENCODING otherwise. | 
| + */ | 
| + inline int GetExternalStringEncoding() const; | 
| + | 
| + | 
| + /** | 
| + * Return the resource of the external string regardless of encoding. | 
| + * Call this only after having made sure that the string is indeed external! | 
| + */ | 
| + inline ExternalStringResourceBase* GetExternalStringResourceBase() const; | 
| + | 
| static inline String* Cast(v8::Value* obj); | 
| /** | 
| - * Allocates a new string from either UTF-8 encoded or ASCII data. | 
| - * The second parameter 'length' gives the buffer length. | 
| - * If the data is UTF-8 encoded, the caller must | 
| - * be careful to supply the length parameter. | 
| - * If it is not given, the function calls | 
| - * 'strlen' to determine the buffer length, it might be | 
| - * wrong if 'data' contains a null character. | 
| + * Allocates a new string from either UTF-8-, Latin1-encoded data. | 
| 
 
Erik Corry
2012/08/21 12:21:34
"-," should be "or "
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * The second parameter 'length' gives the buffer length. If the data is | 
| 
 
Erik Corry
2012/08/21 12:21:34
is UTF-8 encoded -> may contain zero bytes
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * UTF-8 encoded, the caller must be careful to supply the length parameter. | 
| + * If it is not given, the function calls 'strlen' to determine the buffer | 
| + * length, it might be wrong if 'data' contains a null character. | 
| + * The third parameter specifies the encoding, which may include an hint | 
| + * whether the string contains ASCII characters. In case of Latin1, the | 
| 
 
Erik Corry
2012/08/21 12:21:34
In case of -> In the case of
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * appropriate internal representation (UTF16 or ASCII) is chosen. | 
| */ | 
| - V8EXPORT static Local<String> New(const char* data, int length = -1); | 
| + V8EXPORT static Local<String> New(const char* data, | 
| + int length = kUndefinedLength, | 
| + int encoding = UTF8_ENCODING); | 
| 
 
Erik Corry
2012/08/21 12:21:34
Surely this should be a StringEncoding and not an
 
Yang
2012/08/21 13:06:20
The encoding here may additionally contain NOT_ASC
 
 | 
| - /** Allocates a new string from 16-bit character codes.*/ | 
| - V8EXPORT static Local<String> New(const uint16_t* data, int length = -1); | 
| + /** Allocates a new string from 16-bit UTF16 code units.*/ | 
| + V8EXPORT static Local<String> New(const uint16_t* data, | 
| + int length = kUndefinedLength); | 
| /** Creates a symbol. Returns one if it exists already.*/ | 
| - V8EXPORT static Local<String> NewSymbol(const char* data, int length = -1); | 
| + V8EXPORT static Local<String> NewSymbol(const char* data, | 
| + int length = kUndefinedLength, | 
| + int encoding = UTF8_ENCODING); | 
| /** | 
| * Creates a new string by concatenating the left and the right strings | 
| @@ -1247,7 +1302,8 @@ class String : public Primitive { | 
| * this function should not otherwise delete or modify the resource. Neither | 
| * should the underlying buffer be deallocated or modified except through the | 
| * destructor of the external string resource. | 
| - */ V8EXPORT static Local<String> NewExternal( | 
| + */ | 
| + V8EXPORT static Local<String> NewExternal( | 
| ExternalAsciiStringResource* resource); | 
| /** | 
| @@ -1261,6 +1317,24 @@ class String : public Primitive { | 
| */ | 
| V8EXPORT bool MakeExternal(ExternalAsciiStringResource* resource); | 
| + | 
| + /** | 
| + * Creates a new external string using the Latin1-encoded data defined in the | 
| + * given resource. When the external string is no longer live on V8's heap | 
| + * the resource will be disposed by calling its Dispose method. The caller of | 
| + * this function should not otherwise delete or modify the resource. Neither | 
| + * should the underlying buffer be deallocated or modified except through the | 
| + * destructor of the external string resource. | 
| + * If the data contains non-ASCII character, the string is created as new | 
| 
 
Erik Corry
2012/08/21 12:21:34
contains -> contains a
as new -> as a new
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * string object on the V8 heap and the Dispose method is called on the | 
| + * resource immediately. This is because V8 is currently unable to handle | 
| 
 
Erik Corry
2012/08/21 12:21:34
is currently unable -> is unable
 
Yang
2012/08/21 13:06:20
Done.
 
 | 
| + * non-ASCII Latin1-encoded strings internally. | 
| + */ | 
| + V8EXPORT static Local<String> NewExternal( | 
| + ExternalLatin1StringResource* resource, | 
| + int encoding = String::LATIN1_ENCODING); | 
| + | 
| + | 
| /** | 
| * Returns true if this string can be made external. | 
| */ | 
| @@ -1268,11 +1342,13 @@ class String : public Primitive { | 
| /** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/ | 
| V8EXPORT static Local<String> NewUndetectable(const char* data, | 
| - int length = -1); | 
| + int length = kUndefinedLength, | 
| + int encoding = UTF8_ENCODING); | 
| - /** Creates an undetectable string from the supplied 16-bit character codes.*/ | 
| + /** Creates an undetectable string from the supplied 16-bit UTF16 code units. | 
| + */ | 
| V8EXPORT static Local<String> NewUndetectable(const uint16_t* data, | 
| - int length = -1); | 
| + int length = kUndefinedLength); | 
| /** | 
| * Converts an object to a UTF-8-encoded character array. Useful if | 
| @@ -1343,7 +1419,9 @@ class String : public Primitive { | 
| }; | 
| private: | 
| - V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const; | 
| + V8EXPORT void VerifyExternalStringEncoding(int encoding) const; | 
| + V8EXPORT void VerifyExternalStringResourceBase( | 
| + ExternalStringResourceBase* val) const; | 
| V8EXPORT static void CheckCast(v8::Value* obj); | 
| }; | 
| @@ -3960,6 +4038,9 @@ class Internals { | 
| static const int kJSObjectHeaderSize = 3 * kApiPointerSize; | 
| static const int kFullStringRepresentationMask = 0x07; | 
| static const int kExternalTwoByteRepresentationTag = 0x02; | 
| + static const int kExternalAsciiRepresentationTag = 0x06; | 
| + static const int kExternalAsciiDataHintMask = 0x08; | 
| + static const int kExternalAsciiDataHintTag = 0x08; | 
| static const int kIsolateStateOffset = 0; | 
| static const int kIsolateEmbedderDataOffset = 1 * kApiPointerSize; | 
| @@ -4017,11 +4098,6 @@ class Internals { | 
| } | 
| } | 
| - static inline bool IsExternalTwoByteString(int instance_type) { | 
| - int representation = (instance_type & kFullStringRepresentationMask); | 
| - return representation == kExternalTwoByteRepresentationTag; | 
| - } | 
| - | 
| static inline bool IsInitialized(v8::Isolate* isolate) { | 
| uint8_t* addr = reinterpret_cast<uint8_t*>(isolate) + kIsolateStateOffset; | 
| return *reinterpret_cast<int*>(addr) == 1; | 
| @@ -4299,16 +4375,56 @@ Local<String> String::Empty(Isolate* isolate) { | 
| String::ExternalStringResource* String::GetExternalStringResource() const { | 
| typedef internal::Object O; | 
| typedef internal::Internals I; | 
| + String::ExternalStringResource* result = NULL; | 
| O* obj = *reinterpret_cast<O**>(const_cast<String*>(this)); | 
| - String::ExternalStringResource* result; | 
| - if (I::IsExternalTwoByteString(I::GetInstanceType(obj))) { | 
| - void* value = I::ReadField<void*>(obj, I::kStringResourceOffset); | 
| - result = reinterpret_cast<String::ExternalStringResource*>(value); | 
| - } else { | 
| - result = NULL; | 
| + if ((I::GetInstanceType(obj) & I::kFullStringRepresentationMask) == | 
| + I::kExternalTwoByteRepresentationTag) { | 
| + result = reinterpret_cast<String::ExternalStringResource*>( | 
| + GetExternalStringResourceBase()); | 
| + } | 
| + return result; | 
| +} | 
| + | 
| + | 
| +int String::GetExternalStringEncoding() const { | 
| + typedef internal::Object O; | 
| + typedef internal::Internals I; | 
| + O* obj = *reinterpret_cast<O**>(const_cast<String*>(this)); | 
| + static const int kRepresentationAndHintMask = | 
| + I::kFullStringRepresentationMask | I::kExternalAsciiDataHintMask; | 
| + | 
| + int encoding; | 
| + switch (I::GetInstanceType(obj) & kRepresentationAndHintMask) { | 
| + case I::kExternalTwoByteRepresentationTag | I::kExternalAsciiDataHintTag: | 
| + encoding = UTF16_ENCODING | STRICT_ASCII_HINT; | 
| + break; | 
| + case I::kExternalTwoByteRepresentationTag: | 
| + encoding = UTF16_ENCODING | NOT_ASCII_HINT; | 
| + break; | 
| + case I::kExternalAsciiRepresentationTag: | 
| + encoding = LATIN1_ENCODING | STRICT_ASCII_HINT; | 
| + break; | 
| + default: | 
| + encoding = INVALID_ENCODING; | 
| + break; | 
| } | 
| #ifdef V8_ENABLE_CHECKS | 
| - VerifyExternalStringResource(result); | 
| + VerifyExternalStringEncoding(encoding); | 
| +#endif | 
| + return encoding; | 
| +} | 
| + | 
| + | 
| +String::ExternalStringResourceBase* String::GetExternalStringResourceBase() | 
| + const { | 
| + typedef internal::Object O; | 
| + typedef internal::Internals I; | 
| + O* obj = *reinterpret_cast<O**>(const_cast<String*>(this)); | 
| + void* value = I::ReadField<void*>(obj, I::kStringResourceOffset); | 
| + ExternalStringResourceBase* result = | 
| + reinterpret_cast<String::ExternalStringResourceBase*>(value); | 
| +#ifdef V8_ENABLE_CHECKS | 
| + VerifyExternalStringResourceBase(result); | 
| #endif | 
| return result; | 
| } |