Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(521)

Unified Diff: include/v8.h

Issue 10828229: Add basic support for Latin1 strings to the API. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/api.cc » ('j') | src/api.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: include/v8.h
diff --git a/include/v8.h b/include/v8.h
index 480cbaa9553f295f6ad844f789e54fa128059402..ef27d5d3f9f546ebabf669326d6e3eee205af5e8 100644
--- a/include/v8.h
+++ b/include/v8.h
@@ -1069,19 +1069,43 @@ class String : public Primitive {
PRESERVE_ASCII_NULL = 4
};
+
+ enum StringEncoding {
+ INVALID_ENCODING = 0,
+ UTF8_ENCODING = 1,
+ LATIN1_ENCODING = 2,
+ UTF16_ENCODING = 3,
+
+ STRICT_ASCII_HINT = 1 << 16,
+ NOT_ASCII_HINT = 1 << 17
+ };
+
+ static const int kStringEncodingMask = 3;
+ static const int kAsciiHintMask =
+ String::STRICT_ASCII_HINT | String::NOT_ASCII_HINT;
+
+ static const int kUndefinedLength = -1;
+
+
// 16-bit character codes.
V8EXPORT int Write(uint16_t* buffer,
int start = 0,
- int length = -1,
+ int length = kUndefinedLength,
int options = NO_OPTIONS) const;
// ASCII characters.
V8EXPORT int WriteAscii(char* buffer,
int start = 0,
- int length = -1,
+ int length = kUndefinedLength,
int options = NO_OPTIONS) const;
+ // Latin1 characters.
+ V8EXPORT int WriteLatin1(char* buffer,
+ int start = 0,
+ int length = kUndefinedLength,
+ int options = NO_OPTIONS) const;
+
// UTF-8 encoded characters.
V8EXPORT int WriteUtf8(char* buffer,
- int length = -1,
+ int length = kUndefinedLength,
int* nchars_ref = NULL,
int options = NO_OPTIONS) const;
@@ -1122,6 +1146,7 @@ class String : public Primitive {
void operator=(const ExternalStringResourceBase&);
friend class v8::internal::Heap;
+ friend class v8::String;
};
/**
@@ -1181,6 +1206,16 @@ class String : public Primitive {
};
/**
+ * An ExternalLatin1StringResource is a wrapper around an Latin1-encoded
+ * string buffer that resides outside V8's heap. For usage in V8, Latin1
+ * strings are converted to ASCII or two-byte string depending on whether
+ * the string contains non-ASCII characters.
+ */
+ class V8EXPORT ExternalLatin1StringResource
+ : public ExternalAsciiStringResource {
+ };
+
+ /**
* Get the ExternalStringResource for an external string. Returns
* NULL if IsExternal() doesn't return true.
*/
@@ -1193,24 +1228,44 @@ class String : public Primitive {
V8EXPORT const ExternalAsciiStringResource* GetExternalAsciiStringResource()
const;
+ /**
+ * If the string is external, return the its encoding (Latin1, UTF8 or UTF16)
+ * and possibly a hint on whether the content is ASCII.
Erik Corry 2012/08/10 11:28:51 Comment seems wrong as it can't return a UTF8 enco
Yang 2012/08/10 13:14:45 Done.
+ * Return String::INVALID_ENCODING otherwise.
+ */
+ inline int GetExternalStringEncoding() const;
+
+
+ /**
+ * Return the resource of the external string regardless of encoding.
+ * Call this only after having made sure that the string is indeed external!
+ */
+ inline ExternalStringResourceBase* GetExternalStringResourceBase() const;
+
static inline String* Cast(v8::Value* obj);
/**
- * Allocates a new string from either UTF-8 encoded or ASCII data.
- * The second parameter 'length' gives the buffer length.
- * If the data is UTF-8 encoded, the caller must
- * be careful to supply the length parameter.
- * If it is not given, the function calls
- * 'strlen' to determine the buffer length, it might be
- * wrong if 'data' contains a null character.
+ * Allocates a new string from either UTF-8-, Latin1-encoded data.
+ * The second parameter 'length' gives the buffer length. If the data is
+ * UTF-8 encoded, the caller must be careful to supply the length parameter.
+ * If it is not given, the function calls 'strlen' to determine the buffer
+ * length, it might be wrong if 'data' contains a null character.
+ * The third parameter specifies the encoding, which may include an hint
+ * whether the string contains ASCII characters. In case of Latin1, the
+ * appropriate internal representation (UTF16 or ASCII) is chosen.
*/
- V8EXPORT static Local<String> New(const char* data, int length = -1);
+ V8EXPORT static Local<String> New(const char* data,
+ int length = kUndefinedLength,
+ int encoding = UTF8_ENCODING);
Erik Corry 2012/08/10 11:28:51 No point in making the second argument optional.
Yang 2012/08/10 13:14:45 I see your point here. But then I also feel a cer
/** Allocates a new string from 16-bit character codes.*/
Erik Corry 2012/08/10 11:28:51 Not your error, but: 16-bit character codes -> 16-
Yang 2012/08/10 13:14:45 Done.
- V8EXPORT static Local<String> New(const uint16_t* data, int length = -1);
+ V8EXPORT static Local<String> New(const uint16_t* data,
+ int length = kUndefinedLength);
/** Creates a symbol. Returns one if it exists already.*/
- V8EXPORT static Local<String> NewSymbol(const char* data, int length = -1);
+ V8EXPORT static Local<String> NewSymbol(const char* data,
+ int length = kUndefinedLength,
+ int encoding = UTF8_ENCODING);
/**
* Creates a new string by concatenating the left and the right strings
@@ -1247,7 +1302,8 @@ class String : public Primitive {
* this function should not otherwise delete or modify the resource. Neither
* should the underlying buffer be deallocated or modified except through the
* destructor of the external string resource.
- */ V8EXPORT static Local<String> NewExternal(
+ */
+ V8EXPORT static Local<String> NewExternal(
ExternalAsciiStringResource* resource);
/**
@@ -1261,6 +1317,37 @@ class String : public Primitive {
*/
V8EXPORT bool MakeExternal(ExternalAsciiStringResource* resource);
+
+ /**
+ * Creates a new external string using the Latin1-encoded data defined in the
+ * given resource. When the external string is no longer live on V8's heap
+ * the resource will be disposed by calling its Dispose method. The caller of
+ * this function should not otherwise delete or modify the resource. Neither
+ * should the underlying buffer be deallocated or modified except through the
+ * destructor of the external string resource.
+ * If the data contains non-ASCII character, the string is created as new
+ * string object on the V8 heap and the Dispose method is called on the
Erik Corry 2012/08/10 11:28:51 is called ->is called immediately ?
Yang 2012/08/10 13:14:45 Done.
+ * resource. This is because V8 is unable to handle non-ASCII Latin1-encoded
+ * strings internally at this time.
+ */
+ V8EXPORT static Local<String> NewExternal(
+ ExternalLatin1StringResource* resource,
+ int encoding = String::LATIN1_ENCODING);
+
+ /**
+ * Associate an external string resource with this string by transforming it
+ * in place so that existing references to this string in the JavaScript heap
+ * will use the external string resource. The external string resource's
+ * character contents need to be equivalent to this string.
+ * Returns true if the string has been changed to be an external string.
+ * The string is not modified if the operation fails. See NewExternal for
+ * information on the lifetime of the resource.
+ * If the string to be externalized contains non-ASCII characters, use
+ * the two-byte ExternalStringResource instead.
+ */
+ V8EXPORT bool MakeExternal(ExternalLatin1StringResource* resource);
Erik Corry 2012/08/10 11:28:51 Seems like this should be an ExternalASCIIStringRe
Yang 2012/08/10 13:14:45 It's actually intended to be like this. I'm torn
+
+
/**
* Returns true if this string can be made external.
*/
@@ -1268,11 +1355,12 @@ class String : public Primitive {
/** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/
V8EXPORT static Local<String> NewUndetectable(const char* data,
- int length = -1);
+ int length = kUndefinedLength,
+ int encoding = UTF8_ENCODING);
/** Creates an undetectable string from the supplied 16-bit character codes.*/
V8EXPORT static Local<String> NewUndetectable(const uint16_t* data,
- int length = -1);
+ int length = kUndefinedLength);
/**
* Converts an object to a UTF-8-encoded character array. Useful if
@@ -1343,7 +1431,9 @@ class String : public Primitive {
};
private:
- V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const;
+ V8EXPORT void VerifyExternalStringEncoding(int encoding) const;
+ V8EXPORT void VerifyExternalStringResourceBase(
+ ExternalStringResourceBase* val) const;
V8EXPORT static void CheckCast(v8::Value* obj);
};
@@ -3960,6 +4050,9 @@ class Internals {
static const int kJSObjectHeaderSize = 3 * kApiPointerSize;
static const int kFullStringRepresentationMask = 0x07;
static const int kExternalTwoByteRepresentationTag = 0x02;
+ static const int kExternalAsciiRepresentationTag = 0x06;
+ static const int kExternalAsciiDataHintMask = 0x08;
+ static const int kExternalAsciiDataHintTag = 0x08;
static const int kIsolateStateOffset = 0;
static const int kIsolateEmbedderDataOffset = 1 * kApiPointerSize;
@@ -4017,11 +4110,6 @@ class Internals {
}
}
- static inline bool IsExternalTwoByteString(int instance_type) {
- int representation = (instance_type & kFullStringRepresentationMask);
- return representation == kExternalTwoByteRepresentationTag;
- }
-
static inline bool IsInitialized(v8::Isolate* isolate) {
uint8_t* addr = reinterpret_cast<uint8_t*>(isolate) + kIsolateStateOffset;
return *reinterpret_cast<int*>(addr) == 1;
@@ -4297,18 +4385,55 @@ Local<String> String::Empty(Isolate* isolate) {
String::ExternalStringResource* String::GetExternalStringResource() const {
+ String::ExternalStringResource* result = NULL;
+ if ((GetExternalStringEncoding() & kStringEncodingMask) ==
Erik Corry 2012/08/10 11:28:51 This calls GetExternalStringEncoding which gets on
Yang 2012/08/10 13:14:45 Done.
+ UTF16_ENCODING) {
+ result = reinterpret_cast<String::ExternalStringResource*>(
+ GetExternalStringResourceBase());
+ }
+ return result;
+}
+
+
+int String::GetExternalStringEncoding() const {
typedef internal::Object O;
typedef internal::Internals I;
O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
- String::ExternalStringResource* result;
- if (I::IsExternalTwoByteString(I::GetInstanceType(obj))) {
- void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
- result = reinterpret_cast<String::ExternalStringResource*>(value);
- } else {
- result = NULL;
+ static const int kRepresentationAndHintMask =
+ I::kFullStringRepresentationMask | I::kExternalAsciiDataHintMask;
+
+ int encoding;
+ switch (I::GetInstanceType(obj) & kRepresentationAndHintMask) {
+ case I::kExternalTwoByteRepresentationTag | I::kExternalAsciiDataHintTag:
Erik Corry 2012/08/10 11:28:51 Indentation looks wrong here.
Yang 2012/08/10 13:14:45 Done.
+ encoding = UTF16_ENCODING | STRICT_ASCII_HINT;
+ break;
+ case I::kExternalTwoByteRepresentationTag:
+ encoding = UTF16_ENCODING | NOT_ASCII_HINT;
+ break;
+ case I::kExternalAsciiRepresentationTag:
+ encoding = LATIN1_ENCODING | STRICT_ASCII_HINT;
+ break;
+ default:
+ encoding = INVALID_ENCODING;
+ break;
}
#ifdef V8_ENABLE_CHECKS
- VerifyExternalStringResource(result);
+ VerifyExternalStringEncoding(encoding);
+#endif
+ return encoding;
+}
+
+
+String::ExternalStringResourceBase* String::GetExternalStringResourceBase()
+ const {
+ typedef internal::Object O;
+ typedef internal::Internals I;
+ O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
+ void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
+ ExternalStringResourceBase* result =
+ reinterpret_cast<String::ExternalStringResourceBase*>(value);
+#ifdef V8_ENABLE_CHECKS
+ VerifyExternalStringResourceBase(result);
#endif
return result;
}
« no previous file with comments | « no previous file | src/api.cc » ('j') | src/api.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698