OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 4436 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4447 int String::Utf8Length() const { | 4447 int String::Utf8Length() const { |
4448 i::Handle<i::String> str = Utils::OpenHandle(this); | 4448 i::Handle<i::String> str = Utils::OpenHandle(this); |
4449 i::Isolate* isolate = str->GetIsolate(); | 4449 i::Isolate* isolate = str->GetIsolate(); |
4450 return v8::Utf8Length(*str, isolate); | 4450 return v8::Utf8Length(*str, isolate); |
4451 } | 4451 } |
4452 | 4452 |
4453 | 4453 |
4454 class Utf8WriterVisitor { | 4454 class Utf8WriterVisitor { |
4455 public: | 4455 public: |
4456 Utf8WriterVisitor( | 4456 Utf8WriterVisitor( |
4457 char* buffer, int capacity, bool skip_capacity_check) | 4457 char* buffer, |
4458 : early_termination_(false), | 4458 int capacity, |
4459 last_character_(unibrow::Utf16::kNoPreviousCharacter), | 4459 bool skip_capacity_check, |
4460 buffer_(buffer), | 4460 bool replace_invalid_utf8) |
4461 start_(buffer), | 4461 : early_termination_(false), |
4462 capacity_(capacity), | 4462 last_character_(unibrow::Utf16::kNoPreviousCharacter), |
4463 skip_capacity_check_(capacity == -1 || skip_capacity_check), | 4463 buffer_(buffer), |
4464 utf16_chars_read_(0) { | 4464 start_(buffer), |
| 4465 capacity_(capacity), |
| 4466 skip_capacity_check_(capacity == -1 || skip_capacity_check), |
| 4467 replace_invalid_utf8_(replace_invalid_utf8), |
| 4468 utf16_chars_read_(0) { |
4465 } | 4469 } |
4466 | 4470 |
4467 static int WriteEndCharacter(uint16_t character, | 4471 static int WriteEndCharacter(uint16_t character, |
4468 int last_character, | 4472 int last_character, |
4469 int remaining, | 4473 int remaining, |
4470 char* const buffer) { | 4474 char* const buffer, |
| 4475 bool replace_invalid_utf8) { |
4471 using namespace unibrow; | 4476 using namespace unibrow; |
4472 ASSERT(remaining > 0); | 4477 ASSERT(remaining > 0); |
4473 // We can't use a local buffer here because Encode needs to modify | 4478 // We can't use a local buffer here because Encode needs to modify |
4474 // previous characters in the stream. We know, however, that | 4479 // previous characters in the stream. We know, however, that |
4475 // exactly one character will be advanced. | 4480 // exactly one character will be advanced. |
4476 if (Utf16::IsTrailSurrogate(character) && | 4481 if (Utf16::IsSurrogatePair(last_character, character)) { |
4477 Utf16::IsLeadSurrogate(last_character)) { | 4482 int written = Utf8::Encode(buffer, |
4478 int written = Utf8::Encode(buffer, character, last_character); | 4483 character, |
| 4484 last_character, |
| 4485 replace_invalid_utf8); |
4479 ASSERT(written == 1); | 4486 ASSERT(written == 1); |
4480 return written; | 4487 return written; |
4481 } | 4488 } |
4482 // Use a scratch buffer to check the required characters. | 4489 // Use a scratch buffer to check the required characters. |
4483 char temp_buffer[Utf8::kMaxEncodedSize]; | 4490 char temp_buffer[Utf8::kMaxEncodedSize]; |
4484 // Can't encode using last_character as gcc has array bounds issues. | 4491 // Can't encode using last_character as gcc has array bounds issues. |
4485 int written = Utf8::Encode(temp_buffer, | 4492 int written = Utf8::Encode(temp_buffer, |
4486 character, | 4493 character, |
4487 Utf16::kNoPreviousCharacter); | 4494 Utf16::kNoPreviousCharacter, |
| 4495 replace_invalid_utf8); |
4488 // Won't fit. | 4496 // Won't fit. |
4489 if (written > remaining) return 0; | 4497 if (written > remaining) return 0; |
4490 // Copy over the character from temp_buffer. | 4498 // Copy over the character from temp_buffer. |
4491 for (int j = 0; j < written; j++) { | 4499 for (int j = 0; j < written; j++) { |
4492 buffer[j] = temp_buffer[j]; | 4500 buffer[j] = temp_buffer[j]; |
4493 } | 4501 } |
4494 return written; | 4502 return written; |
4495 } | 4503 } |
4496 | 4504 |
| 4505 // Visit writes out a group of code units (chars) of a v8::String to the |
| 4506 // internal buffer_. This is done in two phases. The first phase calculates a |
| 4507 // pesimistic estimate (writable_length) on how many code units can be safely |
| 4508 // written without exceeding the buffer capacity and without writing the last |
| 4509 // code unit (it could be a lead surrogate). The estimated number of code |
| 4510 // units is then written out in one go, and the reported byte usage is used |
| 4511 // to correct the estimate. This is repeated until the estimate becomes <= 0 |
| 4512 // or all code units have been written out. The second phase writes out code |
| 4513 // units until the buffer capacity is reached, would be exceeded by the next |
| 4514 // unit, or all units have been written out. |
4497 template<typename Char> | 4515 template<typename Char> |
4498 void Visit(const Char* chars, const int length) { | 4516 void Visit(const Char* chars, const int length) { |
4499 using namespace unibrow; | 4517 using namespace unibrow; |
4500 ASSERT(!early_termination_); | 4518 ASSERT(!early_termination_); |
4501 if (length == 0) return; | 4519 if (length == 0) return; |
4502 // Copy state to stack. | 4520 // Copy state to stack. |
4503 char* buffer = buffer_; | 4521 char* buffer = buffer_; |
4504 int last_character = | 4522 int last_character = |
4505 sizeof(Char) == 1 ? Utf16::kNoPreviousCharacter : last_character_; | 4523 sizeof(Char) == 1 ? Utf16::kNoPreviousCharacter : last_character_; |
4506 int i = 0; | 4524 int i = 0; |
(...skipping 17 matching lines...) Expand all Loading... |
4524 // Write the characters to the stream. | 4542 // Write the characters to the stream. |
4525 if (sizeof(Char) == 1) { | 4543 if (sizeof(Char) == 1) { |
4526 for (; i < fast_length; i++) { | 4544 for (; i < fast_length; i++) { |
4527 buffer += | 4545 buffer += |
4528 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); | 4546 Utf8::EncodeOneByte(buffer, static_cast<uint8_t>(*chars++)); |
4529 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4547 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4530 } | 4548 } |
4531 } else { | 4549 } else { |
4532 for (; i < fast_length; i++) { | 4550 for (; i < fast_length; i++) { |
4533 uint16_t character = *chars++; | 4551 uint16_t character = *chars++; |
4534 buffer += Utf8::Encode(buffer, character, last_character); | 4552 buffer += Utf8::Encode(buffer, |
| 4553 character, |
| 4554 last_character, |
| 4555 replace_invalid_utf8_); |
4535 last_character = character; | 4556 last_character = character; |
4536 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4557 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4537 } | 4558 } |
4538 } | 4559 } |
4539 // Array is fully written. Exit. | 4560 // Array is fully written. Exit. |
4540 if (fast_length == length) { | 4561 if (fast_length == length) { |
4541 // Write state back out to object. | 4562 // Write state back out to object. |
4542 last_character_ = last_character; | 4563 last_character_ = last_character; |
4543 buffer_ = buffer; | 4564 buffer_ = buffer; |
4544 utf16_chars_read_ += length; | 4565 utf16_chars_read_ += length; |
4545 return; | 4566 return; |
4546 } | 4567 } |
4547 } | 4568 } |
4548 ASSERT(!skip_capacity_check_); | 4569 ASSERT(!skip_capacity_check_); |
4549 // Slow loop. Must check capacity on each iteration. | 4570 // Slow loop. Must check capacity on each iteration. |
4550 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4571 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
4551 ASSERT(remaining_capacity >= 0); | 4572 ASSERT(remaining_capacity >= 0); |
4552 for (; i < length && remaining_capacity > 0; i++) { | 4573 for (; i < length && remaining_capacity > 0; i++) { |
4553 uint16_t character = *chars++; | 4574 uint16_t character = *chars++; |
| 4575 // remaining_capacity is <= 3 bytes at this point, so we do not write out |
| 4576 // an umatched lead surrogate. |
| 4577 if (replace_invalid_utf8_ && Utf16::IsLeadSurrogate(character)) { |
| 4578 early_termination_ = true; |
| 4579 break; |
| 4580 } |
4554 int written = WriteEndCharacter(character, | 4581 int written = WriteEndCharacter(character, |
4555 last_character, | 4582 last_character, |
4556 remaining_capacity, | 4583 remaining_capacity, |
4557 buffer); | 4584 buffer, |
| 4585 replace_invalid_utf8_); |
4558 if (written == 0) { | 4586 if (written == 0) { |
4559 early_termination_ = true; | 4587 early_termination_ = true; |
4560 break; | 4588 break; |
4561 } | 4589 } |
4562 buffer += written; | 4590 buffer += written; |
4563 remaining_capacity -= written; | 4591 remaining_capacity -= written; |
4564 last_character = character; | 4592 last_character = character; |
4565 } | 4593 } |
4566 // Write state back out to object. | 4594 // Write state back out to object. |
4567 last_character_ = last_character; | 4595 last_character_ = last_character; |
(...skipping 27 matching lines...) Expand all Loading... |
4595 return static_cast<int>(buffer_ - start_); | 4623 return static_cast<int>(buffer_ - start_); |
4596 } | 4624 } |
4597 | 4625 |
4598 private: | 4626 private: |
4599 bool early_termination_; | 4627 bool early_termination_; |
4600 int last_character_; | 4628 int last_character_; |
4601 char* buffer_; | 4629 char* buffer_; |
4602 char* const start_; | 4630 char* const start_; |
4603 int capacity_; | 4631 int capacity_; |
4604 bool const skip_capacity_check_; | 4632 bool const skip_capacity_check_; |
| 4633 bool const replace_invalid_utf8_; |
4605 int utf16_chars_read_; | 4634 int utf16_chars_read_; |
4606 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); | 4635 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); |
4607 }; | 4636 }; |
4608 | 4637 |
4609 | 4638 |
4610 static bool RecursivelySerializeToUtf8(i::String* current, | 4639 static bool RecursivelySerializeToUtf8(i::String* current, |
4611 Utf8WriterVisitor* writer, | 4640 Utf8WriterVisitor* writer, |
4612 int recursion_budget) { | 4641 int recursion_budget) { |
4613 while (!writer->IsDone()) { | 4642 while (!writer->IsDone()) { |
4614 i::ConsString* cons_string = i::String::VisitFlat(writer, current); | 4643 i::ConsString* cons_string = i::String::VisitFlat(writer, current); |
(...skipping 18 matching lines...) Expand all Loading... |
4633 int options) const { | 4662 int options) const { |
4634 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4663 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
4635 LOG_API(isolate, "String::WriteUtf8"); | 4664 LOG_API(isolate, "String::WriteUtf8"); |
4636 ENTER_V8(isolate); | 4665 ENTER_V8(isolate); |
4637 i::Handle<i::String> str = Utils::OpenHandle(this); | 4666 i::Handle<i::String> str = Utils::OpenHandle(this); |
4638 if (options & HINT_MANY_WRITES_EXPECTED) { | 4667 if (options & HINT_MANY_WRITES_EXPECTED) { |
4639 FlattenString(str); // Flatten the string for efficiency. | 4668 FlattenString(str); // Flatten the string for efficiency. |
4640 } | 4669 } |
4641 const int string_length = str->length(); | 4670 const int string_length = str->length(); |
4642 bool write_null = !(options & NO_NULL_TERMINATION); | 4671 bool write_null = !(options & NO_NULL_TERMINATION); |
| 4672 bool replace_invalid_utf8 = (options & REPLACE_INVALID_UTF8); |
| 4673 int max16BitCodeUnitSize = unibrow::Utf8::kMax16BitCodeUnitSize; |
4643 // First check if we can just write the string without checking capacity. | 4674 // First check if we can just write the string without checking capacity. |
4644 if (capacity == -1 || capacity / 3 >= string_length) { | 4675 if (capacity == -1 || capacity / max16BitCodeUnitSize >= string_length) { |
4645 Utf8WriterVisitor writer(buffer, capacity, true); | 4676 Utf8WriterVisitor writer(buffer, capacity, true, replace_invalid_utf8); |
4646 const int kMaxRecursion = 100; | 4677 const int kMaxRecursion = 100; |
4647 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); | 4678 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); |
4648 if (success) return writer.CompleteWrite(write_null, nchars_ref); | 4679 if (success) return writer.CompleteWrite(write_null, nchars_ref); |
4649 } else if (capacity >= string_length) { | 4680 } else if (capacity >= string_length) { |
4650 // First check that the buffer is large enough. | 4681 // First check that the buffer is large enough. |
4651 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); | 4682 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); |
4652 if (utf8_bytes <= capacity) { | 4683 if (utf8_bytes <= capacity) { |
4653 // ASCII fast path. | 4684 // ASCII fast path. |
4654 if (utf8_bytes == string_length) { | 4685 if (utf8_bytes == string_length) { |
4655 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); | 4686 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); |
4656 if (nchars_ref != NULL) *nchars_ref = string_length; | 4687 if (nchars_ref != NULL) *nchars_ref = string_length; |
4657 if (write_null && (utf8_bytes+1 <= capacity)) { | 4688 if (write_null && (utf8_bytes+1 <= capacity)) { |
4658 return string_length + 1; | 4689 return string_length + 1; |
4659 } | 4690 } |
4660 return string_length; | 4691 return string_length; |
4661 } | 4692 } |
4662 if (write_null && (utf8_bytes+1 > capacity)) { | 4693 if (write_null && (utf8_bytes+1 > capacity)) { |
4663 options |= NO_NULL_TERMINATION; | 4694 options |= NO_NULL_TERMINATION; |
4664 } | 4695 } |
4665 // Recurse once without a capacity limit. | 4696 // Recurse once without a capacity limit. |
4666 // This will get into the first branch above. | 4697 // This will get into the first branch above. |
4667 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. | 4698 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. |
4668 return WriteUtf8(buffer, -1, nchars_ref, options); | 4699 return WriteUtf8(buffer, -1, nchars_ref, options); |
4669 } | 4700 } |
4670 } | 4701 } |
4671 // Recursive slow path can potentially be unreasonable slow. Flatten. | 4702 // Recursive slow path can potentially be unreasonable slow. Flatten. |
4672 str = FlattenGetString(str); | 4703 str = FlattenGetString(str); |
4673 Utf8WriterVisitor writer(buffer, capacity, false); | 4704 Utf8WriterVisitor writer(buffer, capacity, false, replace_invalid_utf8); |
4674 i::String::VisitFlat(&writer, *str); | 4705 i::String::VisitFlat(&writer, *str); |
4675 return writer.CompleteWrite(write_null, nchars_ref); | 4706 return writer.CompleteWrite(write_null, nchars_ref); |
4676 } | 4707 } |
4677 | 4708 |
4678 | 4709 |
4679 template<typename CharType> | 4710 template<typename CharType> |
4680 static inline int WriteHelper(const String* string, | 4711 static inline int WriteHelper(const String* string, |
4681 CharType* buffer, | 4712 CharType* buffer, |
4682 int start, | 4713 int start, |
4683 int length, | 4714 int length, |
(...skipping 2739 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7423 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); | 7454 Isolate* isolate = reinterpret_cast<Isolate*>(info.GetIsolate()); |
7424 Address callback_address = | 7455 Address callback_address = |
7425 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); | 7456 reinterpret_cast<Address>(reinterpret_cast<intptr_t>(callback)); |
7426 VMState<EXTERNAL> state(isolate); | 7457 VMState<EXTERNAL> state(isolate); |
7427 ExternalCallbackScope call_scope(isolate, callback_address); | 7458 ExternalCallbackScope call_scope(isolate, callback_address); |
7428 callback(info); | 7459 callback(info); |
7429 } | 7460 } |
7430 | 7461 |
7431 | 7462 |
7432 } } // namespace v8::internal | 7463 } } // namespace v8::internal |
OLD | NEW |