Index: chromeos/process_proxy/process_output_watcher.cc |
diff --git a/chromeos/process_proxy/process_output_watcher.cc b/chromeos/process_proxy/process_output_watcher.cc |
index f0639846424f6b10225d5a698830f9491f36a036..906fe63440a3b84877fc65bb50553ced8720a723 100644 |
--- a/chromeos/process_proxy/process_output_watcher.cc |
+++ b/chromeos/process_proxy/process_output_watcher.cc |
@@ -4,16 +4,17 @@ |
#include "chromeos/process_proxy/process_output_watcher.h" |
-#include <algorithm> |
-#include <cstdio> |
-#include <cstring> |
- |
#include <sys/ioctl.h> |
#include <sys/select.h> |
#include <unistd.h> |
+#include <algorithm> |
+#include <cstdio> |
+#include <cstring> |
+ |
#include "base/logging.h" |
#include "base/posix/eintr_wrapper.h" |
+#include "base/third_party/icu/icu_utf.h" |
namespace { |
@@ -32,20 +33,41 @@ void CloseFd(int* fd) { |
*fd = -1; |
} |
+// Gets byte size for a UTF8 character given it's leading byte. The character |
+// size is encoded as number of leading '1' bits in the character's leading |
+// byte. If the most significant bit is '0', the character is a valid ASCII |
+// and it's byte size is 1. |
+// The method returns 1 if the provided byte is invalid leading byte. |
+size_t UTF8SizeFromLeadingByte(uint8 leading_byte) { |
+ size_t byte_count = 0; |
+ uint8 mask = 1 << 7; |
+ uint8 error_mask = 1 << (7 - CBU8_MAX_LENGTH); |
+ while (leading_byte & mask) { |
+ if (mask & error_mask) |
+ return 1; |
+ mask >>= 1; |
+ ++byte_count; |
+ } |
+ return byte_count ? byte_count : 1; |
+} |
+ |
} // namespace |
namespace chromeos { |
-ProcessOutputWatcher::ProcessOutputWatcher(int out_fd, int stop_fd, |
+ProcessOutputWatcher::ProcessOutputWatcher( |
+ int out_fd, |
+ int stop_fd, |
const ProcessOutputCallback& callback) |
- : out_fd_(out_fd), |
+ : read_buffer_size_(0), |
+ out_fd_(out_fd), |
stop_fd_(stop_fd), |
- on_read_callback_(callback) { |
+ on_read_callback_(callback) { |
VerifyFileDescriptor(out_fd_); |
VerifyFileDescriptor(stop_fd_); |
max_fd_ = std::max(out_fd_, stop_fd_); |
// We want to be sure we will be able to add 0 at the end of the input, so -1. |
- read_buffer_size_ = arraysize(read_buffer_) - 1; |
+ read_buffer_capacity_ = arraysize(read_buffer_) - 1; |
} |
void ProcessOutputWatcher::Start() { |
@@ -62,7 +84,7 @@ void ProcessOutputWatcher::WatchProcessOutput() { |
while (true) { |
// This has to be reset with every watch cycle. |
fd_set rfds; |
- DCHECK(stop_fd_ >= 0); |
+ DCHECK_GE(stop_fd_, 0); |
InitReadFdSet(out_fd_, stop_fd_, &rfds); |
int select_result = |
@@ -94,13 +116,16 @@ void ProcessOutputWatcher::ReadFromFd(ProcessOutputType type, int* fd) { |
// other streams in case data is written faster than we read it. If there is |
// more than read_buffer_size_ bytes in pipe, it will be read in the next |
// iteration. |
- ssize_t bytes_read = HANDLE_EINTR(read(*fd, read_buffer_, read_buffer_size_)); |
+ DCHECK_GT(read_buffer_capacity_, read_buffer_size_); |
+ ssize_t bytes_read = |
+ HANDLE_EINTR(read(*fd, |
+ &read_buffer_[read_buffer_size_], |
+ read_buffer_capacity_ - read_buffer_size_)); |
if (bytes_read < 0) |
DPLOG(WARNING) << "read from buffer failed"; |
- if (bytes_read > 0) { |
- on_read_callback_.Run(type, std::string(read_buffer_, bytes_read)); |
- } |
+ if (bytes_read > 0) |
+ ReportOutput(type, bytes_read); |
// If there is nothing on the output the watched process has exited (slave end |
// of pty is closed). |
@@ -113,6 +138,59 @@ void ProcessOutputWatcher::ReadFromFd(ProcessOutputType type, int* fd) { |
} |
} |
+size_t ProcessOutputWatcher::OutputSizeWithoutIncompleteUTF8() { |
+ // Find the last non-trailing character byte. This byte should be used to |
+ // infer the last UTF8 character length. |
+ int last_lead_byte = read_buffer_size_ - 1; |
+ while (true) { |
+ // If the series of trailing bytes is too long, something's not right. |
+ // Report the whole output, without waiting for further character bytes. |
+ if (read_buffer_size_ - last_lead_byte > CBU8_MAX_LENGTH) |
+ return read_buffer_size_; |
+ |
+ // If there are trailing characters, there must be a leading one in the |
+ // buffer for a valid UTF8 character. Getting past the buffer begining |
+ // signals something's wrong, or the buffer is empty. In both cases return |
+ // the whole current buffer. |
+ if (last_lead_byte < 0) |
+ return read_buffer_size_; |
+ |
+ // Found the starting character byte; stop searching. |
+ if (!CBU8_IS_TRAIL(read_buffer_[last_lead_byte])) |
+ break; |
+ |
+ --last_lead_byte; |
+ } |
+ |
+ size_t last_length = UTF8SizeFromLeadingByte(read_buffer_[last_lead_byte]); |
+ |
+ // Note that if |last_length| == 0 or |
+ // |last_length| + |last_read_byte| < |read_buffer_size_|, the string is |
+ // invalid UTF8. In that case, send the whole read buffer to the observer |
+ // immediately, just as if there is no trailing incomplete UTF8 bytes. |
+ if (!last_length || last_length + last_lead_byte <= read_buffer_size_) |
+ return read_buffer_size_; |
+ |
+ return last_lead_byte; |
+} |
+ |
+void ProcessOutputWatcher::ReportOutput(ProcessOutputType type, |
+ size_t new_bytes_count) { |
+ read_buffer_size_ += new_bytes_count; |
+ size_t output_to_report = OutputSizeWithoutIncompleteUTF8(); |
+ |
+ on_read_callback_.Run(type, std::string(read_buffer_, output_to_report)); |
+ |
+ // Move the bytes that were left behind to the beginning of the buffer and |
+ // update the buffer size accordingly. |
+ if (output_to_report < read_buffer_size_) { |
+ for (size_t i = output_to_report; i < read_buffer_size_; ++i) { |
+ read_buffer_[i - output_to_report] = read_buffer_[i]; |
+ } |
+ } |
+ read_buffer_size_ -= output_to_report; |
+} |
+ |
void ProcessOutputWatcher::OnStop() { |
delete this; |
} |