OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 4486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4497 int String::Utf8Length() const { | 4497 int String::Utf8Length() const { |
4498 i::Handle<i::String> str = Utils::OpenHandle(this); | 4498 i::Handle<i::String> str = Utils::OpenHandle(this); |
4499 i::Isolate* isolate = str->GetIsolate(); | 4499 i::Isolate* isolate = str->GetIsolate(); |
4500 return v8::Utf8Length(*str, isolate); | 4500 return v8::Utf8Length(*str, isolate); |
4501 } | 4501 } |
4502 | 4502 |
4503 | 4503 |
4504 class Utf8WriterVisitor { | 4504 class Utf8WriterVisitor { |
4505 public: | 4505 public: |
4506 Utf8WriterVisitor( | 4506 Utf8WriterVisitor( |
4507 char* buffer, int capacity, bool skip_capacity_check) | 4507 char* buffer, |
4508 int capacity, | |
4509 bool skip_capacity_check, | |
4510 bool allow_invalid_utf8) | |
4508 : early_termination_(false), | 4511 : early_termination_(false), |
4509 last_character_(unibrow::Utf16::kNoPreviousCharacter), | 4512 last_character_(unibrow::Utf16::kNoPreviousCharacter), |
4510 buffer_(buffer), | 4513 buffer_(buffer), |
4511 start_(buffer), | 4514 start_(buffer), |
4512 capacity_(capacity), | 4515 capacity_(capacity), |
4513 skip_capacity_check_(capacity == -1 || skip_capacity_check), | 4516 skip_capacity_check_(capacity == -1 || skip_capacity_check), |
4517 allow_invalid_utf8_(allow_invalid_utf8), | |
4514 utf16_chars_read_(0) { | 4518 utf16_chars_read_(0) { |
4515 } | 4519 } |
4516 | 4520 |
4517 static int WriteEndCharacter(uint16_t character, | 4521 // WritePair writes the current UTF-16 code unit to the given buffer. The |
4518 int last_character, | 4522 // function will go back inside the buffer to combine surrogate pairs. |
4523 static int WritePair(uint16_t current, | |
dcarney
2014/01/07 10:12:16
WritePair is a bad name here, since it may or not
| |
4524 int previous, | |
4525 char* buffer, | |
4526 bool allow_invalid_utf8) { | |
4527 using namespace unibrow; | |
4528 int code_point = current; | |
4529 int written = 0; | |
4530 if (Utf16::IsSurrogatePair(previous, current)) { | |
4531 code_point = Utf16::CombineSurrogatePair(previous, current); | |
4532 buffer -= Utf8::kSizeOfUnmatchedSurrogate; | |
4533 written -= Utf8::kSizeOfUnmatchedSurrogate; | |
4534 } | |
4535 return written + Utf8::Encode(buffer, code_point, allow_invalid_utf8); | |
4536 } | |
4537 | |
4538 static int WriteEndCharacter(uint16_t current, | |
4539 int previous, | |
4519 int remaining, | 4540 int remaining, |
4520 char* const buffer) { | 4541 char* const buffer, |
4542 bool allow_invalid_utf8) { | |
4521 using namespace unibrow; | 4543 using namespace unibrow; |
4522 ASSERT(remaining > 0); | 4544 ASSERT(remaining > 0); |
4523 // We can't use a local buffer here because Encode needs to modify | 4545 // We can't use a local buffer here because WritePair needs to modify |
4524 // previous characters in the stream. We know, however, that | 4546 // previous characters in the stream. We know, however, that exactly one |
4525 // exactly one character will be advanced. | 4547 // character will be advanced. |
4526 if (Utf16::IsTrailSurrogate(character) && | 4548 if (Utf16::IsSurrogatePair(previous, current)) { |
4527 Utf16::IsLeadSurrogate(last_character)) { | 4549 int written = WritePair(current, previous, buffer, allow_invalid_utf8); |
4528 int written = Utf8::Encode(buffer, character, last_character); | |
4529 ASSERT(written == 1); | 4550 ASSERT(written == 1); |
4530 return written; | 4551 return written; |
4531 } | 4552 } |
4532 // Use a scratch buffer to check the required characters. | 4553 // Use a scratch buffer to check the required characters. |
4533 char temp_buffer[Utf8::kMaxEncodedSize]; | 4554 char temp_buffer[Utf8::kMaxEncodedSize]; |
4534 // Can't encode using last_character as gcc has array bounds issues. | 4555 // Can't encode using last_character as gcc has array bounds issues. |
4535 int written = Utf8::Encode(temp_buffer, | 4556 int written = WritePair(current, |
dcarney
2014/01/07 10:12:16
this is not a surrogate pair, could use Encode dir
| |
4536 character, | 4557 Utf16::kNoPreviousCharacter, |
4537 Utf16::kNoPreviousCharacter); | 4558 temp_buffer, |
4559 allow_invalid_utf8); | |
4538 // Won't fit. | 4560 // Won't fit. |
4539 if (written > remaining) return 0; | 4561 if (written > remaining) return 0; |
4540 // Copy over the character from temp_buffer. | 4562 // Copy over the character from temp_buffer. |
4541 for (int j = 0; j < written; j++) { | 4563 for (int j = 0; j < written; j++) { |
4542 buffer[j] = temp_buffer[j]; | 4564 buffer[j] = temp_buffer[j]; |
4543 } | 4565 } |
4544 return written; | 4566 return written; |
4545 } | 4567 } |
4546 | 4568 |
4547 template<typename Char> | 4569 template<typename Char> |
(...skipping 26 matching lines...) Expand all Loading... | |
4574 // Write the characters to the stream. | 4596 // Write the characters to the stream. |
4575 if (sizeof(Char) == 1) { | 4597 if (sizeof(Char) == 1) { |
4576 for (; i < fast_length; i++) { | 4598 for (; i < fast_length; i++) { |
4577 buffer += | 4599 buffer += |
4578 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); | 4600 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); |
4579 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4601 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4580 } | 4602 } |
4581 } else { | 4603 } else { |
4582 for (; i < fast_length; i++) { | 4604 for (; i < fast_length; i++) { |
4583 uint16_t character = *chars++; | 4605 uint16_t character = *chars++; |
4584 buffer += Utf8::Encode(buffer, character, last_character); | 4606 buffer += WritePair(character, |
4607 last_character, | |
4608 buffer, | |
4609 allow_invalid_utf8_); | |
4585 last_character = character; | 4610 last_character = character; |
4586 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4611 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4587 } | 4612 } |
4588 } | 4613 } |
4589 // Array is fully written. Exit. | 4614 // Array is fully written. Exit. |
4590 if (fast_length == length) { | 4615 if (fast_length == length) { |
4591 // Write state back out to object. | 4616 // Write state back out to object. |
4592 last_character_ = last_character; | 4617 last_character_ = last_character; |
4593 buffer_ = buffer; | 4618 buffer_ = buffer; |
4594 utf16_chars_read_ += length; | 4619 utf16_chars_read_ += length; |
4595 return; | 4620 return; |
4596 } | 4621 } |
4597 } | 4622 } |
4598 ASSERT(!skip_capacity_check_); | 4623 ASSERT(!skip_capacity_check_); |
4599 // Slow loop. Must check capacity on each iteration. | 4624 // Slow loop. Must check capacity on each iteration. |
4600 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4625 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
4601 ASSERT(remaining_capacity >= 0); | 4626 ASSERT(remaining_capacity >= 0); |
4602 for (; i < length && remaining_capacity > 0; i++) { | 4627 for (; i < length && remaining_capacity > 0; i++) { |
4603 uint16_t character = *chars++; | 4628 uint16_t character = *chars++; |
4604 int written = WriteEndCharacter(character, | 4629 int written = WriteEndCharacter(character, |
4605 last_character, | 4630 last_character, |
4606 remaining_capacity, | 4631 remaining_capacity, |
4607 buffer); | 4632 buffer, |
4633 allow_invalid_utf8_); | |
4608 if (written == 0) { | 4634 if (written == 0) { |
4609 early_termination_ = true; | 4635 early_termination_ = true; |
4610 break; | 4636 break; |
4611 } | 4637 } |
4612 buffer += written; | 4638 buffer += written; |
4613 remaining_capacity -= written; | 4639 remaining_capacity -= written; |
4614 last_character = character; | 4640 last_character = character; |
4615 } | 4641 } |
4616 // Write state back out to object. | 4642 // Write state back out to object. |
4617 last_character_ = last_character; | 4643 last_character_ = last_character; |
(...skipping 27 matching lines...) Expand all Loading... | |
4645 return static_cast<int>(buffer_ - start_); | 4671 return static_cast<int>(buffer_ - start_); |
4646 } | 4672 } |
4647 | 4673 |
4648 private: | 4674 private: |
4649 bool early_termination_; | 4675 bool early_termination_; |
4650 int last_character_; | 4676 int last_character_; |
4651 char* buffer_; | 4677 char* buffer_; |
4652 char* const start_; | 4678 char* const start_; |
4653 int capacity_; | 4679 int capacity_; |
4654 bool const skip_capacity_check_; | 4680 bool const skip_capacity_check_; |
4681 bool const allow_invalid_utf8_; | |
4655 int utf16_chars_read_; | 4682 int utf16_chars_read_; |
4656 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); | 4683 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); |
4657 }; | 4684 }; |
4658 | 4685 |
4659 | 4686 |
4660 static bool RecursivelySerializeToUtf8(i::String* current, | 4687 static bool RecursivelySerializeToUtf8(i::String* current, |
4661 Utf8WriterVisitor* writer, | 4688 Utf8WriterVisitor* writer, |
4662 int recursion_budget) { | 4689 int recursion_budget) { |
4663 while (!writer->IsDone()) { | 4690 while (!writer->IsDone()) { |
4664 i::ConsString* cons_string = i::String::VisitFlat(writer, current); | 4691 i::ConsString* cons_string = i::String::VisitFlat(writer, current); |
(...skipping 18 matching lines...) Expand all Loading... | |
4683 int options) const { | 4710 int options) const { |
4684 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4711 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
4685 LOG_API(isolate, "String::WriteUtf8"); | 4712 LOG_API(isolate, "String::WriteUtf8"); |
4686 ENTER_V8(isolate); | 4713 ENTER_V8(isolate); |
4687 i::Handle<i::String> str = Utils::OpenHandle(this); | 4714 i::Handle<i::String> str = Utils::OpenHandle(this); |
4688 if (options & HINT_MANY_WRITES_EXPECTED) { | 4715 if (options & HINT_MANY_WRITES_EXPECTED) { |
4689 FlattenString(str); // Flatten the string for efficiency. | 4716 FlattenString(str); // Flatten the string for efficiency. |
4690 } | 4717 } |
4691 const int string_length = str->length(); | 4718 const int string_length = str->length(); |
4692 bool write_null = !(options & NO_NULL_TERMINATION); | 4719 bool write_null = !(options & NO_NULL_TERMINATION); |
4720 bool allow_invalid_utf8 = !(options & DISALLOW_INVALID_UTF8); | |
4693 // First check if we can just write the string without checking capacity. | 4721 // First check if we can just write the string without checking capacity. |
4722 // @TODO Replace magic number 3 with something more descriptive. E.g. | |
4723 // Utf8::kMaxTwoByteSize (as in the maximum size an unsighed 2 byte code unit | |
4724 // value will take up when encoded to UTF-8)? When I first read this code I | |
4725 // thought there might be a overflow bug here since UTF-8 may take up to 4 | |
4726 // bytes per code unit. Then I realized that a surrogate pair has a | |
4727 // str.length of 2, making the code correct. | |
4694 if (capacity == -1 || capacity / 3 >= string_length) { | 4728 if (capacity == -1 || capacity / 3 >= string_length) { |
4695 Utf8WriterVisitor writer(buffer, capacity, true); | 4729 Utf8WriterVisitor writer(buffer, capacity, true, allow_invalid_utf8); |
4696 const int kMaxRecursion = 100; | 4730 const int kMaxRecursion = 100; |
4697 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); | 4731 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); |
4698 if (success) return writer.CompleteWrite(write_null, nchars_ref); | 4732 if (success) return writer.CompleteWrite(write_null, nchars_ref); |
4699 } else if (capacity >= string_length) { | 4733 } else if (capacity >= string_length) { |
4700 // First check that the buffer is large enough. | 4734 // First check that the buffer is large enough. |
4701 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); | 4735 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); |
4702 if (utf8_bytes <= capacity) { | 4736 if (utf8_bytes <= capacity) { |
4703 // ASCII fast path. | 4737 // ASCII fast path. |
4704 if (utf8_bytes == string_length) { | 4738 if (utf8_bytes == string_length) { |
4705 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); | 4739 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); |
4706 if (nchars_ref != NULL) *nchars_ref = string_length; | 4740 if (nchars_ref != NULL) *nchars_ref = string_length; |
4707 if (write_null && (utf8_bytes+1 <= capacity)) { | 4741 if (write_null && (utf8_bytes+1 <= capacity)) { |
4708 return string_length + 1; | 4742 return string_length + 1; |
4709 } | 4743 } |
4710 return string_length; | 4744 return string_length; |
4711 } | 4745 } |
4712 if (write_null && (utf8_bytes+1 > capacity)) { | 4746 if (write_null && (utf8_bytes+1 > capacity)) { |
4713 options |= NO_NULL_TERMINATION; | 4747 options |= NO_NULL_TERMINATION; |
4714 } | 4748 } |
4715 // Recurse once without a capacity limit. | 4749 // Recurse once without a capacity limit. |
4716 // This will get into the first branch above. | 4750 // This will get into the first branch above. |
4717 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. | 4751 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. |
4718 return WriteUtf8(buffer, -1, nchars_ref, options); | 4752 return WriteUtf8(buffer, -1, nchars_ref, options); |
4719 } | 4753 } |
4720 } | 4754 } |
4721 // Recursive slow path can potentially be unreasonable slow. Flatten. | 4755 // Recursive slow path can potentially be unreasonable slow. Flatten. |
4722 str = FlattenGetString(str); | 4756 str = FlattenGetString(str); |
4723 Utf8WriterVisitor writer(buffer, capacity, false); | 4757 Utf8WriterVisitor writer(buffer, capacity, false, allow_invalid_utf8); |
4724 i::String::VisitFlat(&writer, *str); | 4758 i::String::VisitFlat(&writer, *str); |
4725 return writer.CompleteWrite(write_null, nchars_ref); | 4759 return writer.CompleteWrite(write_null, nchars_ref); |
4726 } | 4760 } |
4727 | 4761 |
4728 | 4762 |
4729 template<typename CharType> | 4763 template<typename CharType> |
4730 static inline int WriteHelper(const String* string, | 4764 static inline int WriteHelper(const String* string, |
4731 CharType* buffer, | 4765 CharType* buffer, |
4732 int start, | 4766 int start, |
4733 int length, | 4767 int length, |
(...skipping 2781 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
7515 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); | 7549 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); |
7516 Address callback_address = | 7550 Address callback_address = |
7517 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); | 7551 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); |
7518 VMState<EXTERNAL> state(isolate); | 7552 VMState<EXTERNAL> state(isolate); |
7519 ExternalCallbackScope call_scope(isolate, callback_address); | 7553 ExternalCallbackScope call_scope(isolate, callback_address); |
7520 callback(info); | 7554 callback(info); |
7521 } | 7555 } |
7522 | 7556 |
7523 | 7557 |
7524 } } // namespace v8::internal | 7558 } } // namespace v8::internal |
OLD | NEW |