OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 4486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4497 int String::Utf8Length() const { | 4497 int String::Utf8Length() const { |
4498 i::Handle<i::String> str = Utils::OpenHandle(this); | 4498 i::Handle<i::String> str = Utils::OpenHandle(this); |
4499 i::Isolate* isolate = str->GetIsolate(); | 4499 i::Isolate* isolate = str->GetIsolate(); |
4500 return v8::Utf8Length(*str, isolate); | 4500 return v8::Utf8Length(*str, isolate); |
4501 } | 4501 } |
4502 | 4502 |
4503 | 4503 |
4504 class Utf8WriterVisitor { | 4504 class Utf8WriterVisitor { |
4505 public: | 4505 public: |
4506 Utf8WriterVisitor( | 4506 Utf8WriterVisitor( |
4507 char* buffer, int capacity, bool skip_capacity_check) | 4507 char* buffer, |
4508 int capacity, | |
4509 bool skip_capacity_check, | |
4510 bool allow_invalid_utf8) | |
4508 : early_termination_(false), | 4511 : early_termination_(false), |
4509 last_character_(unibrow::Utf16::kNoPreviousCharacter), | 4512 last_character_(unibrow::Utf16::kNoPreviousCharacter), |
4510 buffer_(buffer), | 4513 buffer_(buffer), |
4511 start_(buffer), | 4514 start_(buffer), |
4512 capacity_(capacity), | 4515 capacity_(capacity), |
4513 skip_capacity_check_(capacity == -1 || skip_capacity_check), | 4516 skip_capacity_check_(capacity == -1 || skip_capacity_check), |
4517 allow_invalid_utf8_(allow_invalid_utf8), | |
4514 utf16_chars_read_(0) { | 4518 utf16_chars_read_(0) { |
4515 } | 4519 } |
4516 | 4520 |
4517 static int WriteEndCharacter(uint16_t character, | 4521 static int WriteEndCharacter(uint16_t character, |
4518 int last_character, | 4522 int last_character, |
4519 int remaining, | 4523 int remaining, |
4520 char* const buffer) { | 4524 char* const buffer, |
4525 bool allow_invalid_utf8) { | |
4521 using namespace unibrow; | 4526 using namespace unibrow; |
4522 ASSERT(remaining > 0); | 4527 ASSERT(remaining > 0); |
4523 // We can't use a local buffer here because Encode needs to modify | 4528 // We can't use a local buffer here because Encode needs to modify |
4524 // previous characters in the stream. We know, however, that | 4529 // previous characters in the stream. We know, however, that |
4525 // exactly one character will be advanced. | 4530 // exactly one character will be advanced. |
4526 if (Utf16::IsTrailSurrogate(character) && | 4531 if (Utf16::IsSurrogatePair(last_character, character)) { |
4527 Utf16::IsLeadSurrogate(last_character)) { | 4532 int written = Utf8::Encode(buffer, |
4528 int written = Utf8::Encode(buffer, character, last_character); | 4533 character, |
4534 last_character, | |
4535 allow_invalid_utf8); | |
4529 ASSERT(written == 1); | 4536 ASSERT(written == 1); |
4530 return written; | 4537 return written; |
4531 } | 4538 } |
4532 // Use a scratch buffer to check the required characters. | 4539 // Use a scratch buffer to check the required characters. |
4533 char temp_buffer[Utf8::kMaxEncodedSize]; | 4540 char temp_buffer[Utf8::kMaxEncodedSize]; |
4534 // Can't encode using last_character as gcc has array bounds issues. | 4541 // Can't encode using last_character as gcc has array bounds issues. |
4535 int written = Utf8::Encode(temp_buffer, | 4542 int written = Utf8::Encode(temp_buffer, |
4536 character, | 4543 character, |
4537 Utf16::kNoPreviousCharacter); | 4544 Utf16::kNoPreviousCharacter, |
4545 allow_invalid_utf8); | |
4538 // Won't fit. | 4546 // Won't fit. |
4539 if (written > remaining) return 0; | 4547 if (written > remaining) return 0; |
4540 // Copy over the character from temp_buffer. | 4548 // Copy over the character from temp_buffer. |
4541 for (int j = 0; j < written; j++) { | 4549 for (int j = 0; j < written; j++) { |
4542 buffer[j] = temp_buffer[j]; | 4550 buffer[j] = temp_buffer[j]; |
4543 } | 4551 } |
4544 return written; | 4552 return written; |
4545 } | 4553 } |
4546 | 4554 |
4547 template<typename Char> | 4555 template<typename Char> |
(...skipping 26 matching lines...) Expand all Loading... | |
4574 // Write the characters to the stream. | 4582 // Write the characters to the stream. |
4575 if (sizeof(Char) == 1) { | 4583 if (sizeof(Char) == 1) { |
4576 for (; i < fast_length; i++) { | 4584 for (; i < fast_length; i++) { |
4577 buffer += | 4585 buffer += |
4578 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); | 4586 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); |
4579 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4587 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4580 } | 4588 } |
4581 } else { | 4589 } else { |
4582 for (; i < fast_length; i++) { | 4590 for (; i < fast_length; i++) { |
4583 uint16_t character = *chars++; | 4591 uint16_t character = *chars++; |
4584 buffer += Utf8::Encode(buffer, character, last_character); | 4592 buffer += Utf8::Encode(buffer, |
4593 character, | |
4594 last_character, | |
4595 allow_invalid_utf8_); | |
4585 last_character = character; | 4596 last_character = character; |
4586 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4597 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4587 } | 4598 } |
4588 } | 4599 } |
4589 // Array is fully written. Exit. | 4600 // Array is fully written. Exit. |
4590 if (fast_length == length) { | 4601 if (fast_length == length) { |
4591 // Write state back out to object. | 4602 // Write state back out to object. |
4592 last_character_ = last_character; | 4603 last_character_ = last_character; |
4593 buffer_ = buffer; | 4604 buffer_ = buffer; |
4594 utf16_chars_read_ += length; | 4605 utf16_chars_read_ += length; |
4595 return; | 4606 return; |
4596 } | 4607 } |
4597 } | 4608 } |
4598 ASSERT(!skip_capacity_check_); | 4609 ASSERT(!skip_capacity_check_); |
4599 // Slow loop. Must check capacity on each iteration. | 4610 // Slow loop. Must check capacity on each iteration. |
4600 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4611 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
4601 ASSERT(remaining_capacity >= 0); | 4612 ASSERT(remaining_capacity >= 0); |
4602 for (; i < length && remaining_capacity > 0; i++) { | 4613 for (; i < length && remaining_capacity > 0; i++) { |
4603 uint16_t character = *chars++; | 4614 uint16_t character = *chars++; |
4604 int written = WriteEndCharacter(character, | 4615 int written = WriteEndCharacter(character, |
4605 last_character, | 4616 last_character, |
4606 remaining_capacity, | 4617 remaining_capacity, |
4607 buffer); | 4618 buffer, |
4619 allow_invalid_utf8_); | |
4608 if (written == 0) { | 4620 if (written == 0) { |
4609 early_termination_ = true; | 4621 early_termination_ = true; |
4610 break; | 4622 break; |
4611 } | 4623 } |
4612 buffer += written; | 4624 buffer += written; |
4613 remaining_capacity -= written; | 4625 remaining_capacity -= written; |
4614 last_character = character; | 4626 last_character = character; |
4615 } | 4627 } |
4616 // Write state back out to object. | 4628 // Write state back out to object. |
4617 last_character_ = last_character; | 4629 last_character_ = last_character; |
(...skipping 27 matching lines...) Expand all Loading... | |
4645 return static_cast<int>(buffer_ - start_); | 4657 return static_cast<int>(buffer_ - start_); |
4646 } | 4658 } |
4647 | 4659 |
4648 private: | 4660 private: |
4649 bool early_termination_; | 4661 bool early_termination_; |
4650 int last_character_; | 4662 int last_character_; |
4651 char* buffer_; | 4663 char* buffer_; |
4652 char* const start_; | 4664 char* const start_; |
4653 int capacity_; | 4665 int capacity_; |
4654 bool const skip_capacity_check_; | 4666 bool const skip_capacity_check_; |
4667 bool const allow_invalid_utf8_; | |
4655 int utf16_chars_read_; | 4668 int utf16_chars_read_; |
4656 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); | 4669 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); |
4657 }; | 4670 }; |
4658 | 4671 |
4659 | 4672 |
4660 static bool RecursivelySerializeToUtf8(i::String* current, | 4673 static bool RecursivelySerializeToUtf8(i::String* current, |
4661 Utf8WriterVisitor* writer, | 4674 Utf8WriterVisitor* writer, |
4662 int recursion_budget) { | 4675 int recursion_budget) { |
4663 while (!writer->IsDone()) { | 4676 while (!writer->IsDone()) { |
4664 i::ConsString* cons_string = i::String::VisitFlat(writer, current); | 4677 i::ConsString* cons_string = i::String::VisitFlat(writer, current); |
(...skipping 18 matching lines...) Expand all Loading... | |
4683 int options) const { | 4696 int options) const { |
4684 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4697 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
4685 LOG_API(isolate, "String::WriteUtf8"); | 4698 LOG_API(isolate, "String::WriteUtf8"); |
4686 ENTER_V8(isolate); | 4699 ENTER_V8(isolate); |
4687 i::Handle<i::String> str = Utils::OpenHandle(this); | 4700 i::Handle<i::String> str = Utils::OpenHandle(this); |
4688 if (options & HINT_MANY_WRITES_EXPECTED) { | 4701 if (options & HINT_MANY_WRITES_EXPECTED) { |
4689 FlattenString(str); // Flatten the string for efficiency. | 4702 FlattenString(str); // Flatten the string for efficiency. |
4690 } | 4703 } |
4691 const int string_length = str->length(); | 4704 const int string_length = str->length(); |
4692 bool write_null = !(options & NO_NULL_TERMINATION); | 4705 bool write_null = !(options & NO_NULL_TERMINATION); |
4706 bool allow_invalid_utf8 = !(options & DISALLOW_INVALID_UTF8); | |
4693 // First check if we can just write the string without checking capacity. | 4707 // First check if we can just write the string without checking capacity. |
4708 // @TODO Replace magic number 3 with something more descriptive. E.g. | |
dcarney
2014/01/10 16:54:33
the syntax we use is:
// TODO(username) Comment.
haimuiba
2014/01/13 07:48:21
Done.
| |
4709 // Utf8::kMaxTwoByteSize (as in the maximum size an unsighed 2 byte code unit | |
4710 // value will take up when encoded to UTF-8)? When I first read this code I | |
4711 // thought there might be a overflow bug here since UTF-8 may take up to 4 | |
4712 // bytes per code unit. Then I realized that a surrogate pair has a | |
4713 // str.length of 2, making the code correct. | |
4694 if (capacity == -1 || capacity / 3 >= string_length) { | 4714 if (capacity == -1 || capacity / 3 >= string_length) { |
4695 Utf8WriterVisitor writer(buffer, capacity, true); | 4715 Utf8WriterVisitor writer(buffer, capacity, true, allow_invalid_utf8); |
4696 const int kMaxRecursion = 100; | 4716 const int kMaxRecursion = 100; |
4697 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); | 4717 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); |
4698 if (success) return writer.CompleteWrite(write_null, nchars_ref); | 4718 if (success) return writer.CompleteWrite(write_null, nchars_ref); |
4699 } else if (capacity >= string_length) { | 4719 } else if (capacity >= string_length) { |
4700 // First check that the buffer is large enough. | 4720 // First check that the buffer is large enough. |
4701 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); | 4721 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); |
4702 if (utf8_bytes <= capacity) { | 4722 if (utf8_bytes <= capacity) { |
4703 // ASCII fast path. | 4723 // ASCII fast path. |
4704 if (utf8_bytes == string_length) { | 4724 if (utf8_bytes == string_length) { |
4705 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); | 4725 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); |
4706 if (nchars_ref != NULL) *nchars_ref = string_length; | 4726 if (nchars_ref != NULL) *nchars_ref = string_length; |
4707 if (write_null && (utf8_bytes+1 <= capacity)) { | 4727 if (write_null && (utf8_bytes+1 <= capacity)) { |
4708 return string_length + 1; | 4728 return string_length + 1; |
4709 } | 4729 } |
4710 return string_length; | 4730 return string_length; |
4711 } | 4731 } |
4712 if (write_null && (utf8_bytes+1 > capacity)) { | 4732 if (write_null && (utf8_bytes+1 > capacity)) { |
4713 options |= NO_NULL_TERMINATION; | 4733 options |= NO_NULL_TERMINATION; |
4714 } | 4734 } |
4715 // Recurse once without a capacity limit. | 4735 // Recurse once without a capacity limit. |
4716 // This will get into the first branch above. | 4736 // This will get into the first branch above. |
4717 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. | 4737 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. |
4718 return WriteUtf8(buffer, -1, nchars_ref, options); | 4738 return WriteUtf8(buffer, -1, nchars_ref, options); |
4719 } | 4739 } |
4720 } | 4740 } |
4721 // Recursive slow path can potentially be unreasonable slow. Flatten. | 4741 // Recursive slow path can potentially be unreasonable slow. Flatten. |
4722 str = FlattenGetString(str); | 4742 str = FlattenGetString(str); |
4723 Utf8WriterVisitor writer(buffer, capacity, false); | 4743 Utf8WriterVisitor writer(buffer, capacity, false, allow_invalid_utf8); |
4724 i::String::VisitFlat(&writer, *str); | 4744 i::String::VisitFlat(&writer, *str); |
4725 return writer.CompleteWrite(write_null, nchars_ref); | 4745 return writer.CompleteWrite(write_null, nchars_ref); |
dcarney
2014/01/10 16:49:55
think you need to ensure that all return points fr
haimuiba
2014/01/13 07:48:21
Not sure I understand. If allow_invalid_utf8=false
| |
4726 } | 4746 } |
4727 | 4747 |
4728 | 4748 |
4729 template<typename CharType> | 4749 template<typename CharType> |
4730 static inline int WriteHelper(const String* string, | 4750 static inline int WriteHelper(const String* string, |
4731 CharType* buffer, | 4751 CharType* buffer, |
4732 int start, | 4752 int start, |
4733 int length, | 4753 int length, |
4734 int options) { | 4754 int options) { |
4735 i::Isolate* isolate = Utils::OpenHandle(string)->GetIsolate(); | 4755 i::Isolate* isolate = Utils::OpenHandle(string)->GetIsolate(); |
(...skipping 2779 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
7515 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); | 7535 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); |
7516 Address callback_address = | 7536 Address callback_address = |
7517 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); | 7537 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); |
7518 VMState<EXTERNAL> state(isolate); | 7538 VMState<EXTERNAL> state(isolate); |
7519 ExternalCallbackScope call_scope(isolate, callback_address); | 7539 ExternalCallbackScope call_scope(isolate, callback_address); |
7520 callback(info); | 7540 callback(info); |
7521 } | 7541 } |
7522 | 7542 |
7523 | 7543 |
7524 } } // namespace v8::internal | 7544 } } // namespace v8::internal |
OLD | NEW |