OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 4486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4497 int String::Utf8Length() const { | 4497 int String::Utf8Length() const { |
4498 i::Handle<i::String> str = Utils::OpenHandle(this); | 4498 i::Handle<i::String> str = Utils::OpenHandle(this); |
4499 i::Isolate* isolate = str->GetIsolate(); | 4499 i::Isolate* isolate = str->GetIsolate(); |
4500 return v8::Utf8Length(*str, isolate); | 4500 return v8::Utf8Length(*str, isolate); |
4501 } | 4501 } |
4502 | 4502 |
4503 | 4503 |
4504 class Utf8WriterVisitor { | 4504 class Utf8WriterVisitor { |
4505 public: | 4505 public: |
4506 Utf8WriterVisitor( | 4506 Utf8WriterVisitor( |
4507 char* buffer, int capacity, bool skip_capacity_check) | 4507 char* buffer, |
4508 int capacity, | |
4509 bool skip_capacity_check, | |
4510 bool replace_invalid_utf8) | |
4508 : early_termination_(false), | 4511 : early_termination_(false), |
4509 last_character_(unibrow::Utf16::kNoPreviousCharacter), | 4512 last_character_(unibrow::Utf16::kNoPreviousCharacter), |
4510 buffer_(buffer), | 4513 buffer_(buffer), |
4511 start_(buffer), | 4514 start_(buffer), |
4512 capacity_(capacity), | 4515 capacity_(capacity), |
4513 skip_capacity_check_(capacity == -1 || skip_capacity_check), | 4516 skip_capacity_check_(capacity == -1 || skip_capacity_check), |
4517 replace_invalid_utf8_(replace_invalid_utf8), | |
4514 utf16_chars_read_(0) { | 4518 utf16_chars_read_(0) { |
4515 } | 4519 } |
4516 | 4520 |
4517 static int WriteEndCharacter(uint16_t character, | 4521 static int WriteEndCharacter(uint16_t character, |
4518 int last_character, | 4522 int last_character, |
4519 int remaining, | 4523 int remaining, |
4520 char* const buffer) { | 4524 char* const buffer, |
4525 bool replace_invalid_utf8) { | |
4521 using namespace unibrow; | 4526 using namespace unibrow; |
4522 ASSERT(remaining > 0); | 4527 ASSERT(remaining > 0); |
4523 // We can't use a local buffer here because Encode needs to modify | 4528 // We can't use a local buffer here because Encode needs to modify |
4524 // previous characters in the stream. We know, however, that | 4529 // previous characters in the stream. We know, however, that |
4525 // exactly one character will be advanced. | 4530 // exactly one character will be advanced. |
4526 if (Utf16::IsTrailSurrogate(character) && | 4531 if (Utf16::IsSurrogatePair(last_character, character)) { |
4527 Utf16::IsLeadSurrogate(last_character)) { | 4532 int written = Utf8::Encode(buffer, |
4528 int written = Utf8::Encode(buffer, character, last_character); | 4533 character, |
4534 last_character, | |
4535 replace_invalid_utf8); | |
4529 ASSERT(written == 1); | 4536 ASSERT(written == 1); |
4530 return written; | 4537 return written; |
4531 } | 4538 } |
4532 // Use a scratch buffer to check the required characters. | 4539 // Use a scratch buffer to check the required characters. |
4533 char temp_buffer[Utf8::kMaxEncodedSize]; | 4540 char temp_buffer[Utf8::kMaxEncodedSize]; |
4534 // Can't encode using last_character as gcc has array bounds issues. | 4541 // Can't encode using last_character as gcc has array bounds issues. |
4535 int written = Utf8::Encode(temp_buffer, | 4542 int written = Utf8::Encode(temp_buffer, |
4536 character, | 4543 character, |
4537 Utf16::kNoPreviousCharacter); | 4544 Utf16::kNoPreviousCharacter, |
4545 replace_invalid_utf8); | |
4538 // Won't fit. | 4546 // Won't fit. |
4539 if (written > remaining) return 0; | 4547 if (written > remaining) return 0; |
4540 // Copy over the character from temp_buffer. | 4548 // Copy over the character from temp_buffer. |
4541 for (int j = 0; j < written; j++) { | 4549 for (int j = 0; j < written; j++) { |
4542 buffer[j] = temp_buffer[j]; | 4550 buffer[j] = temp_buffer[j]; |
4543 } | 4551 } |
4544 return written; | 4552 return written; |
4545 } | 4553 } |
4546 | 4554 |
4555 // Visit writes out a group of code units (chars) of a v8::String to the | |
4556 // internal buffer_. This is done in two phases. The first phase calculates a | |
4557 // pesimistic estimate (writable_length) on how many code units can be safely | |
4558 // written without exceeding the buffer capacity and without writing the last | |
4559 // code unit (it could be a lead surrogate). The estimated number of code | |
4560 // units is then written out in one go, and the reported byte usage is used | |
4561 // to correct the estimate. This is repeated until the estimate becomes <= 0 | |
4562 // or all code units have been written out. The second phase writes out code | |
4563 // units until the buffer capacity is reached, would be exceeded by the next | |
4564 // unit, or all units have been written out. | |
4565 // TODO(felixge) This function is rather complex and could benefit from | |
dcarney
2014/01/17 09:10:12
drop the todo
haimuiba
2014/01/20 08:10:27
Done.
| |
4566 // better variable naming and/or splitting up. | |
4547 template<typename Char> | 4567 template<typename Char> |
4548 void Visit(const Char* chars, const int length) { | 4568 void Visit(const Char* chars, const int length) { |
4549 using namespace unibrow; | 4569 using namespace unibrow; |
4550 ASSERT(!early_termination_); | 4570 ASSERT(!early_termination_); |
4551 if (length == 0) return; | 4571 if (length == 0) return; |
4552 // Copy state to stack. | 4572 // Copy state to stack. |
4553 char* buffer = buffer_; | 4573 char* buffer = buffer_; |
4554 int last_character = | 4574 int last_character = |
4555 sizeof(Char) == 1 ? Utf16::kNoPreviousCharacter : last_character_; | 4575 sizeof(Char) == 1 ? Utf16::kNoPreviousCharacter : last_character_; |
4556 int i = 0; | 4576 int i = 0; |
(...skipping 17 matching lines...) Expand all Loading... | |
4574 // Write the characters to the stream. | 4594 // Write the characters to the stream. |
4575 if (sizeof(Char) == 1) { | 4595 if (sizeof(Char) == 1) { |
4576 for (; i < fast_length; i++) { | 4596 for (; i < fast_length; i++) { |
4577 buffer += | 4597 buffer += |
4578 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); | 4598 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); |
4579 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4599 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4580 } | 4600 } |
4581 } else { | 4601 } else { |
4582 for (; i < fast_length; i++) { | 4602 for (; i < fast_length; i++) { |
4583 uint16_t character = *chars++; | 4603 uint16_t character = *chars++; |
4584 buffer += Utf8::Encode(buffer, character, last_character); | 4604 buffer += Utf8::Encode(buffer, |
4605 character, | |
4606 last_character, | |
4607 replace_invalid_utf8_); | |
4585 last_character = character; | 4608 last_character = character; |
4586 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4609 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4587 } | 4610 } |
4588 } | 4611 } |
4589 // Array is fully written. Exit. | 4612 // Array is fully written. Exit. |
4590 if (fast_length == length) { | 4613 if (fast_length == length) { |
4591 // Write state back out to object. | 4614 // Write state back out to object. |
4592 last_character_ = last_character; | 4615 last_character_ = last_character; |
4593 buffer_ = buffer; | 4616 buffer_ = buffer; |
4594 utf16_chars_read_ += length; | 4617 utf16_chars_read_ += length; |
4595 return; | 4618 return; |
4596 } | 4619 } |
4597 } | 4620 } |
4598 ASSERT(!skip_capacity_check_); | 4621 ASSERT(!skip_capacity_check_); |
4599 // Slow loop. Must check capacity on each iteration. | 4622 // Slow loop. Must check capacity on each iteration. |
4600 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4623 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
4601 ASSERT(remaining_capacity >= 0); | 4624 ASSERT(remaining_capacity >= 0); |
4602 for (; i < length && remaining_capacity > 0; i++) { | 4625 for (; i < length && remaining_capacity > 0; i++) { |
4603 uint16_t character = *chars++; | 4626 uint16_t character = *chars++; |
4627 if (replace_invalid_utf8_ && Utf16::IsLeadSurrogate(character)) { | |
dcarney
2014/01/17 09:10:12
this line is in the correct place, but it's only t
haimuiba
2014/01/20 08:10:27
Done.
| |
4628 early_termination_ = true; | |
4629 break; | |
4630 } | |
4631 | |
dcarney
2014/01/17 09:10:12
no space
haimuiba
2014/01/20 08:10:27
Done.
| |
4604 int written = WriteEndCharacter(character, | 4632 int written = WriteEndCharacter(character, |
4605 last_character, | 4633 last_character, |
4606 remaining_capacity, | 4634 remaining_capacity, |
4607 buffer); | 4635 buffer, |
4636 replace_invalid_utf8_); | |
4608 if (written == 0) { | 4637 if (written == 0) { |
4609 early_termination_ = true; | 4638 early_termination_ = true; |
4610 break; | 4639 break; |
4611 } | 4640 } |
4612 buffer += written; | 4641 buffer += written; |
4613 remaining_capacity -= written; | 4642 remaining_capacity -= written; |
4614 last_character = character; | 4643 last_character = character; |
4615 } | 4644 } |
4616 // Write state back out to object. | 4645 // Write state back out to object. |
4617 last_character_ = last_character; | 4646 last_character_ = last_character; |
(...skipping 27 matching lines...) Expand all Loading... | |
4645 return static_cast<int>(buffer_ - start_); | 4674 return static_cast<int>(buffer_ - start_); |
4646 } | 4675 } |
4647 | 4676 |
4648 private: | 4677 private: |
4649 bool early_termination_; | 4678 bool early_termination_; |
4650 int last_character_; | 4679 int last_character_; |
4651 char* buffer_; | 4680 char* buffer_; |
4652 char* const start_; | 4681 char* const start_; |
4653 int capacity_; | 4682 int capacity_; |
4654 bool const skip_capacity_check_; | 4683 bool const skip_capacity_check_; |
4684 bool const replace_invalid_utf8_; | |
4655 int utf16_chars_read_; | 4685 int utf16_chars_read_; |
4656 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); | 4686 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); |
4657 }; | 4687 }; |
4658 | 4688 |
4659 | 4689 |
4660 static bool RecursivelySerializeToUtf8(i::String* current, | 4690 static bool RecursivelySerializeToUtf8(i::String* current, |
4661 Utf8WriterVisitor* writer, | 4691 Utf8WriterVisitor* writer, |
4662 int recursion_budget) { | 4692 int recursion_budget) { |
4663 while (!writer->IsDone()) { | 4693 while (!writer->IsDone()) { |
4664 i::ConsString* cons_string = i::String::VisitFlat(writer, current); | 4694 i::ConsString* cons_string = i::String::VisitFlat(writer, current); |
(...skipping 18 matching lines...) Expand all Loading... | |
4683 int options) const { | 4713 int options) const { |
4684 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4714 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
4685 LOG_API(isolate, "String::WriteUtf8"); | 4715 LOG_API(isolate, "String::WriteUtf8"); |
4686 ENTER_V8(isolate); | 4716 ENTER_V8(isolate); |
4687 i::Handle<i::String> str = Utils::OpenHandle(this); | 4717 i::Handle<i::String> str = Utils::OpenHandle(this); |
4688 if (options & HINT_MANY_WRITES_EXPECTED) { | 4718 if (options & HINT_MANY_WRITES_EXPECTED) { |
4689 FlattenString(str); // Flatten the string for efficiency. | 4719 FlattenString(str); // Flatten the string for efficiency. |
4690 } | 4720 } |
4691 const int string_length = str->length(); | 4721 const int string_length = str->length(); |
4692 bool write_null = !(options & NO_NULL_TERMINATION); | 4722 bool write_null = !(options & NO_NULL_TERMINATION); |
4723 bool replace_invalid_utf8 = (options & REPLACE_INVALID_UTF8); | |
4724 int max16BitCodeUnitSize = unibrow::Utf8::kMax16BitCodeUnitSize; | |
4693 // First check if we can just write the string without checking capacity. | 4725 // First check if we can just write the string without checking capacity. |
4694 if (capacity == -1 || capacity / 3 >= string_length) { | 4726 if (capacity == -1 || capacity / max16BitCodeUnitSize >= string_length) { |
4695 Utf8WriterVisitor writer(buffer, capacity, true); | 4727 Utf8WriterVisitor writer(buffer, capacity, true, replace_invalid_utf8); |
4696 const int kMaxRecursion = 100; | 4728 const int kMaxRecursion = 100; |
4697 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); | 4729 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); |
4698 if (success) return writer.CompleteWrite(write_null, nchars_ref); | 4730 if (success) return writer.CompleteWrite(write_null, nchars_ref); |
4699 } else if (capacity >= string_length) { | 4731 } else if (capacity >= string_length) { |
4700 // First check that the buffer is large enough. | 4732 // First check that the buffer is large enough. |
4701 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); | 4733 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); |
4702 if (utf8_bytes <= capacity) { | 4734 if (utf8_bytes <= capacity) { |
4703 // ASCII fast path. | 4735 // ASCII fast path. |
4704 if (utf8_bytes == string_length) { | 4736 if (utf8_bytes == string_length) { |
4705 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); | 4737 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); |
4706 if (nchars_ref != NULL) *nchars_ref = string_length; | 4738 if (nchars_ref != NULL) *nchars_ref = string_length; |
4707 if (write_null && (utf8_bytes+1 <= capacity)) { | 4739 if (write_null && (utf8_bytes+1 <= capacity)) { |
4708 return string_length + 1; | 4740 return string_length + 1; |
4709 } | 4741 } |
4710 return string_length; | 4742 return string_length; |
4711 } | 4743 } |
4712 if (write_null && (utf8_bytes+1 > capacity)) { | 4744 if (write_null && (utf8_bytes+1 > capacity)) { |
4713 options |= NO_NULL_TERMINATION; | 4745 options |= NO_NULL_TERMINATION; |
4714 } | 4746 } |
4715 // Recurse once without a capacity limit. | 4747 // Recurse once without a capacity limit. |
4716 // This will get into the first branch above. | 4748 // This will get into the first branch above. |
4717 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. | 4749 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. |
4718 return WriteUtf8(buffer, -1, nchars_ref, options); | 4750 return WriteUtf8(buffer, -1, nchars_ref, options); |
4719 } | 4751 } |
4720 } | 4752 } |
4721 // Recursive slow path can potentially be unreasonable slow. Flatten. | 4753 // Recursive slow path can potentially be unreasonable slow. Flatten. |
4722 str = FlattenGetString(str); | 4754 str = FlattenGetString(str); |
4723 Utf8WriterVisitor writer(buffer, capacity, false); | 4755 Utf8WriterVisitor writer(buffer, capacity, false, replace_invalid_utf8); |
4724 i::String::VisitFlat(&writer, *str); | 4756 i::String::VisitFlat(&writer, *str); |
4725 return writer.CompleteWrite(write_null, nchars_ref); | 4757 return writer.CompleteWrite(write_null, nchars_ref); |
4726 } | 4758 } |
4727 | 4759 |
4728 | 4760 |
4729 template<typename CharType> | 4761 template<typename CharType> |
4730 static inline int WriteHelper(const String* string, | 4762 static inline int WriteHelper(const String* string, |
4731 CharType* buffer, | 4763 CharType* buffer, |
4732 int start, | 4764 int start, |
4733 int length, | 4765 int length, |
(...skipping 2781 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
7515 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); | 7547 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); |
7516 Address callback_address = | 7548 Address callback_address = |
7517 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); | 7549 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); |
7518 VMState<EXTERNAL> state(isolate); | 7550 VMState<EXTERNAL> state(isolate); |
7519 ExternalCallbackScope call_scope(isolate, callback_address); | 7551 ExternalCallbackScope call_scope(isolate, callback_address); |
7520 callback(info); | 7552 callback(info); |
7521 } | 7553 } |
7522 | 7554 |
7523 | 7555 |
7524 } } // namespace v8::internal | 7556 } } // namespace v8::internal |
OLD | NEW |