| Index: src/objects.h
|
| diff --git a/src/objects.h b/src/objects.h
|
| index 13edfd78ba7bc46f569600fb2b2c7d8a841734cf..d8d2da7748a63a0ea24b31820da50d0f2c09118f 100644
|
| --- a/src/objects.h
|
| +++ b/src/objects.h
|
| @@ -86,6 +86,7 @@
|
| // - SeqString
|
| // - SeqAsciiString
|
| // - SeqTwoByteString
|
| +// - SlicedString
|
| // - ConsString
|
| // - ExternalString
|
| // - ExternalAsciiString
|
| @@ -280,6 +281,7 @@ static const int kVariableSizeSentinel = 0;
|
| V(ASCII_STRING_TYPE) \
|
| V(CONS_STRING_TYPE) \
|
| V(CONS_ASCII_STRING_TYPE) \
|
| + V(SLICED_STRING_TYPE) \
|
| V(EXTERNAL_STRING_TYPE) \
|
| V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE) \
|
| V(EXTERNAL_ASCII_STRING_TYPE) \
|
| @@ -396,6 +398,14 @@ static const int kVariableSizeSentinel = 0;
|
| ConsString::kSize, \
|
| cons_ascii_string, \
|
| ConsAsciiString) \
|
| + V(SLICED_STRING_TYPE, \
|
| + SlicedString::kSize, \
|
| + sliced_string, \
|
| + SlicedString) \
|
| + V(SLICED_ASCII_STRING_TYPE, \
|
| + SlicedString::kSize, \
|
| + sliced_ascii_string, \
|
| + SlicedAsciiString) \
|
| V(EXTERNAL_STRING_TYPE, \
|
| ExternalTwoByteString::kSize, \
|
| external_string, \
|
| @@ -469,9 +479,17 @@ const uint32_t kStringRepresentationMask = 0x03;
|
| enum StringRepresentationTag {
|
| kSeqStringTag = 0x0,
|
| kConsStringTag = 0x1,
|
| - kExternalStringTag = 0x2
|
| + kExternalStringTag = 0x2,
|
| + kSlicedStringTag = 0x3
|
| };
|
| -const uint32_t kIsConsStringMask = 0x1;
|
| +const uint32_t kIsIndirectStringMask = 0x1;
|
| +const uint32_t kIsIndirectStringTag = 0x1;
|
| +STATIC_ASSERT((kSeqStringTag & kIsIndirectStringMask) == 0);
|
| +STATIC_ASSERT((kExternalStringTag & kIsIndirectStringMask) == 0);
|
| +STATIC_ASSERT(
|
| + (kConsStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
|
| +STATIC_ASSERT(
|
| + (kSlicedStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
|
|
|
| // If bit 7 is clear, then bit 3 indicates whether this two-byte
|
| // string actually contains ascii data.
|
| @@ -506,6 +524,8 @@ enum InstanceType {
|
| ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
|
| CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
|
| CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
|
| + SLICED_STRING_TYPE = kTwoByteStringTag | kSlicedStringTag,
|
| + SLICED_ASCII_STRING_TYPE = kAsciiStringTag | kSlicedStringTag,
|
| EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
|
| EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
|
| kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
|
| @@ -709,6 +729,7 @@ class MaybeObject BASE_EMBEDDED {
|
| V(SeqString) \
|
| V(ExternalString) \
|
| V(ConsString) \
|
| + V(SlicedString) \
|
| V(ExternalTwoByteString) \
|
| V(ExternalAsciiString) \
|
| V(SeqTwoByteString) \
|
| @@ -5689,6 +5710,8 @@ class StringShape BASE_EMBEDDED {
|
| inline bool IsSequential();
|
| inline bool IsExternal();
|
| inline bool IsCons();
|
| + inline bool IsSliced();
|
| + inline bool IsIndirect();
|
| inline bool IsExternalAscii();
|
| inline bool IsExternalTwoByte();
|
| inline bool IsSequentialAscii();
|
| @@ -5737,6 +5760,12 @@ class String: public HeapObject {
|
| inline bool IsAsciiRepresentation();
|
| inline bool IsTwoByteRepresentation();
|
|
|
| + // Cons and slices have an encoding flag that may not represent the actual
|
| + // encoding of the underlying string. This is taken into account here.
|
| + // Requires: this->IsFlat()
|
| + inline bool IsAsciiRepresentationUnderneath();
|
| + inline bool IsTwoByteRepresentationUnderneath();
|
| +
|
| // Returns whether this string has ascii chars, i.e. all of them can
|
| // be ascii encoded. This might be the case even if the string is
|
| // two-byte. Such strings may appear when the embedder prefers
|
| @@ -5774,6 +5803,10 @@ class String: public HeapObject {
|
| // string.
|
| inline String* TryFlattenGetString(PretenureFlag pretenure = NOT_TENURED);
|
|
|
| + // Returns the parent of a sliced string or first part of a flat cons string.
|
| + // Requires: StringShape(this).IsIndirect() && this->IsFlat()
|
| + inline String* GetUnderlying();
|
| +
|
| Vector<const char> ToAsciiVector();
|
| Vector<const uc16> ToUC16Vector();
|
|
|
| @@ -6203,11 +6236,69 @@ class ConsString: public String {
|
| typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize>
|
| BodyDescriptor;
|
|
|
| +#ifdef DEBUG
|
| + void ConsStringVerify();
|
| +#endif
|
| +
|
| private:
|
| DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString);
|
| };
|
|
|
|
|
| +// The Sliced String class describes strings that are substrings of another
|
| +// sequential string. The motivation is to save time and memory when creating
|
| +// a substring. A Sliced String is described as a pointer to the parent,
|
| +// the offset from the start of the parent string and the length. Using
|
| +// a Sliced String therefore requires unpacking of the parent string and
|
| +// adding the offset to the start address. A substring of a Sliced String
|
| +// are not nested since the double indirection is simplified when creating
|
| +// such a substring.
|
| +// Currently missing features are:
|
| +// - handling externalized parent strings
|
| +// - external strings as parent
|
| +// - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
|
| +class SlicedString: public String {
|
| + public:
|
| +
|
| + inline String* parent();
|
| + inline void set_parent(String* parent);
|
| + inline int offset();
|
| + inline void set_offset(int offset);
|
| +
|
| + // Dispatched behavior.
|
| + uint16_t SlicedStringGet(int index);
|
| +
|
| + // Casting.
|
| + static inline SlicedString* cast(Object* obj);
|
| +
|
| + // Layout description.
|
| + static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize);
|
| + static const int kOffsetOffset = kParentOffset + kPointerSize;
|
| + static const int kSize = kOffsetOffset + kPointerSize;
|
| +
|
| + // Support for StringInputBuffer
|
| + inline const unibrow::byte* SlicedStringReadBlock(ReadBlockBuffer* buffer,
|
| + unsigned* offset_ptr,
|
| + unsigned chars);
|
| + inline void SlicedStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
| + unsigned* offset_ptr,
|
| + unsigned chars);
|
| + // Minimum length for a sliced string.
|
| + static const int kMinLength = 13;
|
| +
|
| + typedef FixedBodyDescriptor<kParentOffset,
|
| + kOffsetOffset + kPointerSize, kSize>
|
| + BodyDescriptor;
|
| +
|
| +#ifdef DEBUG
|
| + void SlicedStringVerify();
|
| +#endif
|
| +
|
| + private:
|
| + DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
|
| +};
|
| +
|
| +
|
| // The ExternalString class describes string values that are backed by
|
| // a string resource that lies outside the V8 heap. ExternalStrings
|
| // consist of the length field common to all strings, a pointer to the
|
|
|