Index: include/v8.h |
diff --git a/include/v8.h b/include/v8.h |
index 480cbaa9553f295f6ad844f789e54fa128059402..3cb37b1b9b6099c9a8cb87239cdd00f994f4cc32 100644 |
--- a/include/v8.h |
+++ b/include/v8.h |
@@ -1069,19 +1069,48 @@ class String : public Primitive { |
PRESERVE_ASCII_NULL = 4 |
}; |
- // 16-bit character codes. |
+ |
+ enum StringEncoding { |
+ INVALID_ENCODING = 0, |
+ UTF_8_ENCODING = 1, |
+ LATIN1_ENCODING = 2, |
+ UTF_16_ENCODING = 3, |
+ |
+ ASCII_HINT = 1 << 16, |
+ NOT_ASCII_HINT = 1 << 17 |
+ }; |
+ |
+ static const int kStringEncodingMask = 3; |
+ static const int kAsciiHintMask = String::ASCII_HINT | String::NOT_ASCII_HINT; |
+ |
+ static const int kUndefinedLength = -1; |
+ |
+ |
+ // 16-bit UTF16 code units. PRESERVE_ASCII_NULL is not supported as option, |
+ // null-characters are never converted to spaces. |
V8EXPORT int Write(uint16_t* buffer, |
int start = 0, |
- int length = -1, |
+ int length = kUndefinedLength, |
int options = NO_OPTIONS) const; |
- // ASCII characters. |
+ |
+ // ASCII characters. Null-characters are converted to spaces unless |
+ // PRESERVE_ASCII_NULL is set as option. |
V8EXPORT int WriteAscii(char* buffer, |
int start = 0, |
- int length = -1, |
+ int length = kUndefinedLength, |
int options = NO_OPTIONS) const; |
- // UTF-8 encoded characters. |
+ |
+ // Latin1 characters. PRESERVE_ASCII_NULL is not supported as option, |
+ // null-characters are never converted to spaces. |
+ V8EXPORT int WriteLatin1(char* buffer, |
+ int start = 0, |
+ int length = kUndefinedLength, |
+ int options = NO_OPTIONS) const; |
+ |
+ // UTF-8 encoded characters. PRESERVE_ASCII_NULL is not supported as option, |
+ // null-characters are never converted to spaces. |
V8EXPORT int WriteUtf8(char* buffer, |
- int length = -1, |
+ int length = kUndefinedLength, |
int* nchars_ref = NULL, |
int options = NO_OPTIONS) const; |
@@ -1122,6 +1151,7 @@ class String : public Primitive { |
void operator=(const ExternalStringResourceBase&); |
friend class v8::internal::Heap; |
+ friend class v8::String; |
}; |
/** |
@@ -1181,6 +1211,16 @@ class String : public Primitive { |
}; |
/** |
+ * An ExternalLatin1StringResource is a wrapper around an Latin1-encoded |
+ * string buffer that resides outside V8's heap. For usage in V8, a Latin1 |
+ * string is converted to ASCII or two-byte string depending on whether |
+ * it contains non-ASCII characters. |
+ */ |
+ class V8EXPORT ExternalLatin1StringResource |
+ : public ExternalAsciiStringResource { |
+ }; |
+ |
+ /** |
* Get the ExternalStringResource for an external string. Returns |
* NULL if IsExternal() doesn't return true. |
*/ |
@@ -1193,24 +1233,44 @@ class String : public Primitive { |
V8EXPORT const ExternalAsciiStringResource* GetExternalAsciiStringResource() |
const; |
+ /** |
+ * If the string is external, return its encoding (Latin1 or UTF16) |
+ * and possibly a hint on whether the content is ASCII. |
+ * Return String::INVALID_ENCODING otherwise. |
+ */ |
+ inline int GetExternalStringEncoding() const; |
+ |
+ |
+ /** |
+ * Return the resource of the external string regardless of encoding. |
+ * Call this only after having made sure that the string is indeed external! |
+ */ |
+ inline ExternalStringResourceBase* GetExternalStringResourceBase() const; |
+ |
static inline String* Cast(v8::Value* obj); |
/** |
- * Allocates a new string from either UTF-8 encoded or ASCII data. |
- * The second parameter 'length' gives the buffer length. |
- * If the data is UTF-8 encoded, the caller must |
- * be careful to supply the length parameter. |
- * If it is not given, the function calls |
- * 'strlen' to determine the buffer length, it might be |
- * wrong if 'data' contains a null character. |
+ * Allocates a new string from either UTF-8 or Latin1-encoded data. |
+ * The second parameter 'length' gives the buffer length. If the data may |
+ * contain zero bytes, the caller must be careful to supply the length |
+ * parameter. If it is not given, the function calls 'strlen' to determine |
+ * the buffer length, it might be wrong if 'data' contains a null character. |
+ * The third parameter specifies the encoding, which may include an hint |
+ * whether the string contains ASCII characters. In the case of Latin1, the |
+ * appropriate internal representation (UTF16 or ASCII) is chosen. |
*/ |
- V8EXPORT static Local<String> New(const char* data, int length = -1); |
+ V8EXPORT static Local<String> New(const char* data, |
+ int length = kUndefinedLength, |
+ int encoding = UTF_8_ENCODING); |
- /** Allocates a new string from 16-bit character codes.*/ |
- V8EXPORT static Local<String> New(const uint16_t* data, int length = -1); |
+ /** Allocates a new string from 16-bit UTF-16 code units.*/ |
+ V8EXPORT static Local<String> New(const uint16_t* data, |
+ int length = kUndefinedLength); |
/** Creates a symbol. Returns one if it exists already.*/ |
- V8EXPORT static Local<String> NewSymbol(const char* data, int length = -1); |
+ V8EXPORT static Local<String> NewSymbol(const char* data, |
+ int length = kUndefinedLength, |
+ int encoding = UTF_8_ENCODING); |
/** |
* Creates a new string by concatenating the left and the right strings |
@@ -1247,7 +1307,8 @@ class String : public Primitive { |
* this function should not otherwise delete or modify the resource. Neither |
* should the underlying buffer be deallocated or modified except through the |
* destructor of the external string resource. |
- */ V8EXPORT static Local<String> NewExternal( |
+ */ |
+ V8EXPORT static Local<String> NewExternal( |
ExternalAsciiStringResource* resource); |
/** |
@@ -1261,6 +1322,24 @@ class String : public Primitive { |
*/ |
V8EXPORT bool MakeExternal(ExternalAsciiStringResource* resource); |
+ |
+ /** |
+ * Creates a new external string using the Latin1-encoded data defined in the |
+ * given resource. When the external string is no longer live on V8's heap |
+ * the resource will be disposed by calling its Dispose method. The caller of |
+ * this function should not otherwise delete or modify the resource. Neither |
+ * should the underlying buffer be deallocated or modified except through the |
+ * destructor of the external string resource. |
+ * If the data contains a non-ASCII character, the string is created as a new |
+ * string object on the V8 heap and the Dispose method is called on the |
+ * resource immediately. This is because V8 is unable to handle non-ASCII |
+ * Latin1-encoded strings internally. |
+ */ |
+ V8EXPORT static Local<String> NewExternal( |
+ ExternalLatin1StringResource* resource, |
+ int encoding = String::LATIN1_ENCODING); |
+ |
+ |
/** |
* Returns true if this string can be made external. |
*/ |
@@ -1268,11 +1347,13 @@ class String : public Primitive { |
/** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/ |
V8EXPORT static Local<String> NewUndetectable(const char* data, |
- int length = -1); |
+ int length = kUndefinedLength, |
+ int encoding = UTF_8_ENCODING); |
- /** Creates an undetectable string from the supplied 16-bit character codes.*/ |
+ /** Creates an undetectable string from the supplied 16-bit UTF16 code units. |
+ */ |
V8EXPORT static Local<String> NewUndetectable(const uint16_t* data, |
- int length = -1); |
+ int length = kUndefinedLength); |
/** |
* Converts an object to a UTF-8-encoded character array. Useful if |
@@ -1343,7 +1424,9 @@ class String : public Primitive { |
}; |
private: |
- V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const; |
+ V8EXPORT void VerifyExternalStringEncoding(int encoding) const; |
+ V8EXPORT void VerifyExternalStringResourceBase( |
+ ExternalStringResourceBase* val) const; |
V8EXPORT static void CheckCast(v8::Value* obj); |
}; |
@@ -3960,6 +4043,9 @@ class Internals { |
static const int kJSObjectHeaderSize = 3 * kApiPointerSize; |
static const int kFullStringRepresentationMask = 0x07; |
static const int kExternalTwoByteRepresentationTag = 0x02; |
+ static const int kExternalAsciiRepresentationTag = 0x06; |
+ static const int kExternalAsciiDataHintMask = 0x08; |
+ static const int kExternalAsciiDataHintTag = 0x08; |
static const int kIsolateStateOffset = 0; |
static const int kIsolateEmbedderDataOffset = 1 * kApiPointerSize; |
@@ -4017,11 +4103,6 @@ class Internals { |
} |
} |
- static inline bool IsExternalTwoByteString(int instance_type) { |
- int representation = (instance_type & kFullStringRepresentationMask); |
- return representation == kExternalTwoByteRepresentationTag; |
- } |
- |
static inline bool IsInitialized(v8::Isolate* isolate) { |
uint8_t* addr = reinterpret_cast<uint8_t*>(isolate) + kIsolateStateOffset; |
return *reinterpret_cast<int*>(addr) == 1; |
@@ -4299,16 +4380,56 @@ Local<String> String::Empty(Isolate* isolate) { |
String::ExternalStringResource* String::GetExternalStringResource() const { |
typedef internal::Object O; |
typedef internal::Internals I; |
+ String::ExternalStringResource* result = NULL; |
O* obj = *reinterpret_cast<O**>(const_cast<String*>(this)); |
- String::ExternalStringResource* result; |
- if (I::IsExternalTwoByteString(I::GetInstanceType(obj))) { |
- void* value = I::ReadField<void*>(obj, I::kStringResourceOffset); |
- result = reinterpret_cast<String::ExternalStringResource*>(value); |
- } else { |
- result = NULL; |
+ if ((I::GetInstanceType(obj) & I::kFullStringRepresentationMask) == |
+ I::kExternalTwoByteRepresentationTag) { |
+ result = reinterpret_cast<String::ExternalStringResource*>( |
+ GetExternalStringResourceBase()); |
} |
+ return result; |
+} |
+ |
+ |
+int String::GetExternalStringEncoding() const { |
+ typedef internal::Object O; |
+ typedef internal::Internals I; |
+ O* obj = *reinterpret_cast<O**>(const_cast<String*>(this)); |
+ static const int kRepresentationAndHintMask = |
+ I::kFullStringRepresentationMask | I::kExternalAsciiDataHintMask; |
+ |
+ int encoding; |
+ switch (I::GetInstanceType(obj) & kRepresentationAndHintMask) { |
+ case I::kExternalTwoByteRepresentationTag | I::kExternalAsciiDataHintTag: |
+ encoding = UTF_16_ENCODING | ASCII_HINT; |
+ break; |
+ case I::kExternalTwoByteRepresentationTag: |
+ encoding = UTF_16_ENCODING | NOT_ASCII_HINT; |
+ break; |
+ case I::kExternalAsciiRepresentationTag: |
+ encoding = LATIN1_ENCODING | ASCII_HINT; |
+ break; |
+ default: |
+ encoding = INVALID_ENCODING; |
+ break; |
+ } |
+#ifdef V8_ENABLE_CHECKS |
+ VerifyExternalStringEncoding(encoding); |
+#endif |
+ return encoding; |
+} |
+ |
+ |
+String::ExternalStringResourceBase* String::GetExternalStringResourceBase() |
+ const { |
+ typedef internal::Object O; |
+ typedef internal::Internals I; |
+ O* obj = *reinterpret_cast<O**>(const_cast<String*>(this)); |
+ void* value = I::ReadField<void*>(obj, I::kStringResourceOffset); |
+ ExternalStringResourceBase* result = |
+ reinterpret_cast<String::ExternalStringResourceBase*>(value); |
#ifdef V8_ENABLE_CHECKS |
- VerifyExternalStringResource(result); |
+ VerifyExternalStringResourceBase(result); |
#endif |
return result; |
} |