|
|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/chromeos/file_system_provider/fileapi/buffering_file_st ream_reader.h" | |
6 | |
7 #include <algorithm> | |
8 | |
9 #include "base/message_loop/message_loop.h" | |
10 #include "net/base/io_buffer.h" | |
11 #include "net/base/net_errors.h" | |
12 | |
13 namespace chromeos { | |
14 namespace file_system_provider { | |
15 | |
16 BufferingFileStreamReader::BufferingFileStreamReader( | |
17 scoped_ptr<webkit_blob::FileStreamReader> file_stream_reader, | |
18 int buffer_size) | |
19 : file_stream_reader_(file_stream_reader.Pass()), | |
20 buffer_size_(buffer_size), | |
21 preloading_buffer_(new net::IOBuffer(buffer_size_)), | |
22 preloading_buffer_offset_(0), | |
23 buffered_bytes_(0), | |
24 weak_ptr_factory_(this) { | |
25 } | |
26 | |
27 BufferingFileStreamReader::~BufferingFileStreamReader() { | |
28 } | |
29 | |
30 int BufferingFileStreamReader::Read(net::IOBuffer* buffer, | |
31 int buffer_length, | |
32 const net::CompletionCallback& callback) { | |
33 // Return as much as available in the internal buffer. It may be less than | |
34 // |buffer_length|, what is valid. | |
35 const int bytes_read = | |
36 CopyFromBuffer(make_scoped_refptr(buffer), buffer_length); | |
37 if (bytes_read) | |
38 return bytes_read; | |
hashimoto
2014/06/06 03:36:52
How about adding here something like:
if (buffer_
How about adding here something like:
if (buffer_length >= buffer_size_)
return file_stream_reader_->Read(buffer, buffer_length, callback);
I'm worrying, in future, the user code may pass a big value for |buffer_length|
and this place may be a hidden bottleneck.
mtomasz
2014/06/06 04:22:56
The buffer length is not user controlled, but it i
On 2014/06/06 03:36:52, hashimoto wrote:
> How about adding here something like:
>
> if (buffer_length >= buffer_size_)
> return file_stream_reader_->Read(buffer, buffer_length, callback);
>
> I'm worrying, in future, the user code may pass a big value for
|buffer_length|
> and this place may be a hidden bottleneck.
The buffer length is not user controlled, but it is hard-coded currently to
32KB. However, if it happened that it is larger than |buffer_size| (512KB) and
we would have this extra-if, then we could crash the renderer process, since the
maximum IPC message is 1MB.
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
Hence, the current code has two purposes - to increase chunk size from 32KB to
512KB, and to cap it to 512KB, so we won't crash the renderer.
hashimoto
2014/06/09 11:12:04
Isn't "static const size_t kMaximumMessageSize = 1
On 2014/06/06 04:22:56, mtomasz wrote:
> On 2014/06/06 03:36:52, hashimoto wrote:
> > How about adding here something like:
> >
> > if (buffer_length >= buffer_size_)
> > return file_stream_reader_->Read(buffer, buffer_length, callback);
> >
> > I'm worrying, in future, the user code may pass a big value for
> |buffer_length|
> > and this place may be a hidden bottleneck.
>
> The buffer length is not user controlled, but it is hard-coded currently to
> 32KB. However, if it happened that it is larger than |buffer_size| (512KB) and
> we would have this extra-if, then we could crash the renderer process, since
the
> maximum IPC message is 1MB.
>
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;" 128MB?
>
> Hence, the current code has two purposes - to increase chunk size from 32KB to
> 512KB, and to cap it to 512KB, so we won't crash the renderer.
If it was the purpose, why don't you add a static assert against it?
mtomasz
2014/06/09 13:51:16
You're right, of course 128MB. In such case, the e
On 2014/06/09 11:12:04, hashimoto wrote:
> On 2014/06/06 04:22:56, mtomasz wrote:
> > On 2014/06/06 03:36:52, hashimoto wrote:
> > > How about adding here something like:
> > >
> > > if (buffer_length >= buffer_size_)
> > > return file_stream_reader_->Read(buffer, buffer_length, callback);
> > >
> > > I'm worrying, in future, the user code may pass a big value for
> > |buffer_length|
> > > and this place may be a hidden bottleneck.
> >
> > The buffer length is not user controlled, but it is hard-coded currently to
> > 32KB. However, if it happened that it is larger than |buffer_size| (512KB)
and
> > we would have this extra-if, then we could crash the renderer process, since
> the
> > maximum IPC message is 1MB.
> >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;" 128MB?
> >
> > Hence, the current code has two purposes - to increase chunk size from 32KB
to
> > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> If it was the purpose, why don't you add a static assert against it?
>
You're right, of course 128MB. In such case, the extra if makes of course sense.
But how do you want to use a static assert here? Could you clarify?
mtomasz
2014/06/10 08:38:42
I thought about it again, and IMHO we should have
On 2014/06/09 13:51:16, mtomasz wrote:
> On 2014/06/09 11:12:04, hashimoto wrote:
> > On 2014/06/06 04:22:56, mtomasz wrote:
> > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > How about adding here something like:
> > > >
> > > > if (buffer_length >= buffer_size_)
> > > > return file_stream_reader_->Read(buffer, buffer_length, callback);
> > > >
> > > > I'm worrying, in future, the user code may pass a big value for
> > > |buffer_length|
> > > > and this place may be a hidden bottleneck.
> > >
> > > The buffer length is not user controlled, but it is hard-coded currently
to
> > > 32KB. However, if it happened that it is larger than |buffer_size| (512KB)
> and
> > > we would have this extra-if, then we could crash the renderer process,
since
> > the
> > > maximum IPC message is 1MB.
> > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;" 128MB?
> > >
> > > Hence, the current code has two purposes - to increase chunk size from
32KB
> to
> > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > If it was the purpose, why don't you add a static assert against it?
> >
>
> You're right, of course 128MB. In such case, the extra if makes of course
sense.
> But how do you want to use a static assert here? Could you clarify?
I thought about it again, and IMHO we should have a cap for the buffer size as
it is.
We can't simply add that if. If the inner buffer already has some data, we have
to copy them first, and then if |buffer_length - bytes_read| > buffer_size_,
then the rest directly.
It is non trivial, and it will require a test case. But we won't use this code,
because of the current buffer length size restriction. If it grows to large size
in the future, then fetching a lot of data directly from providing extensions
will cause request timeouts in case of remote providing extensions.
So basically, there is IMHO no reason to add this code path.
Hence, I added a comment to make it clear that this class fetches data from the
underlying file stream reader in chunks of up to |buffer_length| size.
hashimoto
2014/06/10 10:43:24
If it was really a purpose of this class, having a
On 2014/06/10 08:38:42, mtomasz wrote:
> On 2014/06/09 13:51:16, mtomasz wrote:
> > On 2014/06/09 11:12:04, hashimoto wrote:
> > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > How about adding here something like:
> > > > >
> > > > > if (buffer_length >= buffer_size_)
> > > > > return file_stream_reader_->Read(buffer, buffer_length, callback);
> > > > >
> > > > > I'm worrying, in future, the user code may pass a big value for
> > > > |buffer_length|
> > > > > and this place may be a hidden bottleneck.
> > > >
> > > > The buffer length is not user controlled, but it is hard-coded currently
> to
> > > > 32KB. However, if it happened that it is larger than |buffer_size|
(512KB)
> > and
> > > > we would have this extra-if, then we could crash the renderer process,
> since
> > > the
> > > > maximum IPC message is 1MB.
> > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;"
128MB?
> > > >
> > > > Hence, the current code has two purposes - to increase chunk size from
> 32KB
> > to
> > > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > > If it was the purpose, why don't you add a static assert against it?
> > >
> >
> > You're right, of course 128MB. In such case, the extra if makes of course
> sense.
> > But how do you want to use a static assert here? Could you clarify?
If it was really a purpose of this class, having a static assert to check that
the chunk size is less than the IPC size limit will make your purpose clearer
and prevent mysterious crash from happening when someone tries to increase the
chunk size in future.
>
> I thought about it again, and IMHO we should have a cap for the buffer size as
> it is.
>
> We can't simply add that if. If the inner buffer already has some data, we
have
> to copy them first, and then if |buffer_length - bytes_read| > buffer_size_,
> then the rest directly.
The code here returns the result without performing actual read when any data is
left in the buffer.
Why do you think we need to change this behavior for direct read case?
>
> It is non trivial, and it will require a test case. But we won't use this
code,
Sorry, I don't understand why you are OK with supporting synchronous readers and
having test cases for it which we will never need,
while you are feeling nervous about preparing for future chunk size increase
which may possibly happen.
> because of the current buffer length size restriction. If it grows to large
size
> in the future, then fetching a lot of data directly from providing extensions
> will cause request timeouts in case of remote providing extensions.
If it was the case, shouldn't we allow providing extensions to decide how to
behave?
(e.g. Depending on their servers' capability, capping the chunk size and
returning smaller amount of data for read request.)
Also, isn't 512KB already large enough to cause timeout with poor network
connection?
>
> So basically, there is IMHO no reason to add this code path.
>
> Hence, I added a comment to make it clear that this class fetches data from
the
> underlying file stream reader in chunks of up to |buffer_length| size.
mtomasz
2014/06/10 12:23:55
Do you want to crash with a static assert in such
On 2014/06/10 10:43:24, hashimoto wrote:
> On 2014/06/10 08:38:42, mtomasz wrote:
> > On 2014/06/09 13:51:16, mtomasz wrote:
> > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > How about adding here something like:
> > > > > >
> > > > > > if (buffer_length >= buffer_size_)
> > > > > > return file_stream_reader_->Read(buffer, buffer_length, callback);
> > > > > >
> > > > > > I'm worrying, in future, the user code may pass a big value for
> > > > > |buffer_length|
> > > > > > and this place may be a hidden bottleneck.
> > > > >
> > > > > The buffer length is not user controlled, but it is hard-coded
currently
> > to
> > > > > 32KB. However, if it happened that it is larger than |buffer_size|
> (512KB)
> > > and
> > > > > we would have this extra-if, then we could crash the renderer process,
> > since
> > > > the
> > > > > maximum IPC message is 1MB.
> > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;"
> 128MB?
> > > > >
> > > > > Hence, the current code has two purposes - to increase chunk size from
> > 32KB
> > > to
> > > > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > > > If it was the purpose, why don't you add a static assert against it?
> > > >
> > >
> > > You're right, of course 128MB. In such case, the extra if makes of course
> > sense.
> > > But how do you want to use a static assert here? Could you clarify?
> If it was really a purpose of this class, having a static assert to check that
> the chunk size is less than the IPC size limit will make your purpose clearer
> and prevent mysterious crash from happening when someone tries to increase the
> chunk size in future.
Do you want to crash with a static assert in such situations? Why? We want to
let providers work, even if the |buffer_length| is too large.
> >
> > I thought about it again, and IMHO we should have a cap for the buffer size
as
> > it is.
> >
> > We can't simply add that if. If the inner buffer already has some data, we
> have
> > to copy them first, and then if |buffer_length - bytes_read| > buffer_size_,
> > then the rest directly.
> The code here returns the result without performing actual read when any data
is
> left in the buffer.
We always try to read |buffer_length| from the internal buffer. If there is not
enough, then we return partial results. If nothing is read, then we prefetch and
return as much as we can.
> Why do you think we need to change this behavior for direct read case?
If we skip contents in the buffer, then we will lose data. We always *must*
return all data from the internal buffer, before calling the internal file
stream reader for more data.
> >
> > It is non trivial, and it will require a test case. But we won't use this
> code,
> Sorry, I don't understand why you are OK with supporting synchronous readers
and
> having test cases for it which we will never need,
> while you are feeling nervous about preparing for future chunk size increase
> which may possibly happen.
Support for synchronous readers:
+ Let's us remove dependency on the internal file stream reader implementation.
+ Makes code simpler, and DCHECK free. Think of GetLength.
- Requires TEST_P. Test cases are same.
- We don't need it now.
As for "preparing for future chunk size increase":
- Requires *new* test cases.
- We don't need it now.
- We will not need it in the future. (See below)
- May cause a lot of problems. (See below)
- We can easily add it later if needed.
I honestly can't see any advantages.
> > because of the current buffer length size restriction. If it grows to large
> size
> > in the future, then fetching a lot of data directly from providing
extensions
> > will cause request timeouts in case of remote providing extensions.
> If it was the case, shouldn't we allow providing extensions to decide how to
> behave?
> (e.g. Depending on their servers' capability, capping the chunk size and
> returning smaller amount of data for read request.)
1. They already can return data in chunks, so eg. that 512KB they can split to
eg. 10x51KB.
2. If we add this "if", then the buffer size may be any in the future. Every
developer has to add logic for splitting it. But, I predict that most of them
will not. So once we increase the buffer size in Chromium, all remote extensions
will start timeouting, because a lot of developers assumed (by observation) that
the buffer size is always 512KB.
3. If the buffer is too large, then it will affect copying time between remote
providing extensions and between them and Drive. We can't start uploading,
before the first chunk is fully downloaded. This means even 2x slower copy if
the file size is around the buffer size.
4. Similarly, for too big buffer size, videos will load with a significant
delay.
5. For big buffer size, copying progress bars will be very unreliable.
Saying that, I'm still not convinced how having a large buffer, like couple of
megabytes could be a good thing in general case. I mentioned several serious
problems above, and probably there is much more.
> Also, isn't 512KB already large enough to cause timeout with poor network
> connection?
It may be too big. I'm thinking of a solution for this, eg. a dialog like "This
operation takes longer than expected. Do you want to wait for it or abort?".
> >
> > So basically, there is IMHO no reason to add this code path.
> >
> > Hence, I added a comment to make it clear that this class fetches data from
> the
> > underlying file stream reader in chunks of up to |buffer_length| size.
>
hashimoto
2014/06/11 06:01:03
Static assert failure doesn't cause a crash. It ca
On 2014/06/10 12:23:55, mtomasz wrote:
> On 2014/06/10 10:43:24, hashimoto wrote:
> > On 2014/06/10 08:38:42, mtomasz wrote:
> > > On 2014/06/09 13:51:16, mtomasz wrote:
> > > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > > How about adding here something like:
> > > > > > >
> > > > > > > if (buffer_length >= buffer_size_)
> > > > > > > return file_stream_reader_->Read(buffer, buffer_length,
callback);
> > > > > > >
> > > > > > > I'm worrying, in future, the user code may pass a big value for
> > > > > > |buffer_length|
> > > > > > > and this place may be a hidden bottleneck.
> > > > > >
> > > > > > The buffer length is not user controlled, but it is hard-coded
> currently
> > > to
> > > > > > 32KB. However, if it happened that it is larger than |buffer_size|
> > (512KB)
> > > > and
> > > > > > we would have this extra-if, then we could crash the renderer
process,
> > > since
> > > > > the
> > > > > > maximum IPC message is 1MB.
> > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;"
> > 128MB?
> > > > > >
> > > > > > Hence, the current code has two purposes - to increase chunk size
from
> > > 32KB
> > > > to
> > > > > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > > > > If it was the purpose, why don't you add a static assert against it?
> > > > >
> > > >
> > > > You're right, of course 128MB. In such case, the extra if makes of
course
> > > sense.
> > > > But how do you want to use a static assert here? Could you clarify?
> > If it was really a purpose of this class, having a static assert to check
that
> > the chunk size is less than the IPC size limit will make your purpose
clearer
> > and prevent mysterious crash from happening when someone tries to increase
the
> > chunk size in future.
>
> Do you want to crash with a static assert in such situations? Why? We want to
Static assert failure doesn't cause a crash. It causes a compile error.
> let providers work, even if the |buffer_length| is too large.
Didn't you say that the browser crashes when the buffer length is larger than
the maximum IPC message size?
>
> > >
> > > I thought about it again, and IMHO we should have a cap for the buffer
size
> as
> > > it is.
> > >
> > > We can't simply add that if. If the inner buffer already has some data, we
> > have
> > > to copy them first, and then if |buffer_length - bytes_read| >
buffer_size_,
> > > then the rest directly.
> > The code here returns the result without performing actual read when any
data
> is
> > left in the buffer.
>
> We always try to read |buffer_length| from the internal buffer. If there is
not
> enough, then we return partial results. If nothing is read, then we prefetch
and
> return as much as we can.
>
> > Why do you think we need to change this behavior for direct read case?
>
> If we skip contents in the buffer, then we will lose data. We always *must*
> return all data from the internal buffer, before calling the internal file
> stream reader for more data.
I'm not saying we should leave data unread in the buffer.
What I'm saying is that we should just put a if before the Preload() and let the
inner reader directly return the result when appropriate.
>
> > >
> > > It is non trivial, and it will require a test case. But we won't use this
> > code,
> > Sorry, I don't understand why you are OK with supporting synchronous readers
> and
> > having test cases for it which we will never need,
> > while you are feeling nervous about preparing for future chunk size increase
> > which may possibly happen.
>
> Support for synchronous readers:
> + Let's us remove dependency on the internal file stream reader
implementation.
> + Makes code simpler, and DCHECK free. Think of GetLength.
DCHECK cannot cause a malfunction on the product while "if" can.
GetLength() is nothing here. All complexity of this class is dedicated to
Read().
> - Requires TEST_P. Test cases are same.
Not exactly the same. There are a number of "mode_ == SYNCHRONOUS" in tests and
the fake classes.
> - We don't need it now.
>
> As for "preparing for future chunk size increase":
> - Requires *new* test cases.
> - We don't need it now.
> - We will not need it in the future. (See below)
> - May cause a lot of problems. (See below)
> - We can easily add it later if needed.
The problem here is that it's hard to know when it's needed.
When the file system provided by extensions are slow, it's hard to know which
part is the bottleneck.
It can be the cloud service, the extension implementation, IPC between the
provider extension and the browser process, data handling in the browser
process, or Files.app.
>
> I honestly can't see any advantages.
>
> > > because of the current buffer length size restriction. If it grows to
large
> > size
> > > in the future, then fetching a lot of data directly from providing
> extensions
> > > will cause request timeouts in case of remote providing extensions.
> > If it was the case, shouldn't we allow providing extensions to decide how to
> > behave?
> > (e.g. Depending on their servers' capability, capping the chunk size and
> > returning smaller amount of data for read request.)
>
> 1. They already can return data in chunks, so eg. that 512KB they can split to
> eg. 10x51KB.
> 2. If we add this "if", then the buffer size may be any in the future. Every
> developer has to add logic for splitting it. But, I predict that most of them
> will not. So once we increase the buffer size in Chromium, all remote
extensions
> will start timeouting, because a lot of developers assumed (by observation)
that
> the buffer size is always 512KB.
We shouldn't care about developers who depend on an undocumented behavior.
> 3. If the buffer is too large, then it will affect copying time between remote
> providing extensions and between them and Drive. We can't start uploading,
> before the first chunk is fully downloaded. This means even 2x slower copy if
> the file size is around the buffer size.
Our current drive implementation doesn't start uploading until the file gets
closed.
> 4. Similarly, for too big buffer size, videos will load with a significant
> delay.
> 5. For big buffer size, copying progress bars will be very unreliable.
>
> Saying that, I'm still not convinced how having a large buffer, like couple of
> megabytes could be a good thing in general case. I mentioned several serious
> problems above, and probably there is much more.
It's not you who makes this decision.
Anyone can modify the user code in fileapi layer to change the buffer size with
a good rationale without consulting you.
>
> > Also, isn't 512KB already large enough to cause timeout with poor network
> > connection?
>
> It may be too big. I'm thinking of a solution for this, eg. a dialog like
"This
> operation takes longer than expected. Do you want to wait for it or abort?".
Why do you need to implement a new UI when the operation is aborted thanks to
the timeout?
>
> > >
> > > So basically, there is IMHO no reason to add this code path.
> > >
> > > Hence, I added a comment to make it clear that this class fetches data
from
> > the
> > > underlying file stream reader in chunks of up to |buffer_length| size.
> >
>
mtomasz
2014/06/16 04:12:18
We can't use them then. We need to compile Chrome,
On 2014/06/11 06:01:03, hashimoto wrote:
> On 2014/06/10 12:23:55, mtomasz wrote:
> > On 2014/06/10 10:43:24, hashimoto wrote:
> > > On 2014/06/10 08:38:42, mtomasz wrote:
> > > > On 2014/06/09 13:51:16, mtomasz wrote:
> > > > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > > > How about adding here something like:
> > > > > > > >
> > > > > > > > if (buffer_length >= buffer_size_)
> > > > > > > > return file_stream_reader_->Read(buffer, buffer_length,
> callback);
> > > > > > > >
> > > > > > > > I'm worrying, in future, the user code may pass a big value for
> > > > > > > |buffer_length|
> > > > > > > > and this place may be a hidden bottleneck.
> > > > > > >
> > > > > > > The buffer length is not user controlled, but it is hard-coded
> > currently
> > > > to
> > > > > > > 32KB. However, if it happened that it is larger than |buffer_size|
> > > (512KB)
> > > > > and
> > > > > > > we would have this extra-if, then we could crash the renderer
> process,
> > > > since
> > > > > > the
> > > > > > > maximum IPC message is 1MB.
> > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 * 1024;"
> > > 128MB?
> > > > > > >
> > > > > > > Hence, the current code has two purposes - to increase chunk size
> from
> > > > 32KB
> > > > > to
> > > > > > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > > > > > If it was the purpose, why don't you add a static assert against it?
> > > > > >
> > > > >
> > > > > You're right, of course 128MB. In such case, the extra if makes of
> course
> > > > sense.
> > > > > But how do you want to use a static assert here? Could you clarify?
> > > If it was really a purpose of this class, having a static assert to check
> that
> > > the chunk size is less than the IPC size limit will make your purpose
> clearer
> > > and prevent mysterious crash from happening when someone tries to increase
> the
> > > chunk size in future.
> >
> > Do you want to crash with a static assert in such situations? Why? We want
to
> Static assert failure doesn't cause a crash. It causes a compile error.
We can't use them then. We need to compile Chrome, even if the buffer is larger.
Buffer size is same for any file stream reader. BufferingFileStreamReader just
adjusts it to our preferred size - which is now 512 KB.
> > let providers work, even if the |buffer_length| is too large.
> Didn't you say that the browser crashes when the buffer length is larger than
> the maximum IPC message size?
These things are not related. BufferedFileStreamReader works if
BufferedFileStreamReader::Read's |buffer_length| is larger than max IPC message
size, since the *inner file stream reader's* Read's would be called with
|buffer_length| up to 512KB size, which is smaller than IPC message size. If we
introduced the if-statement you suggested, then we could crash.
> >
> > > >
> > > > I thought about it again, and IMHO we should have a cap for the buffer
> size
> > as
> > > > it is.
> > > >
> > > > We can't simply add that if. If the inner buffer already has some data,
we
> > > have
> > > > to copy them first, and then if |buffer_length - bytes_read| >
> buffer_size_,
> > > > then the rest directly.
> > > The code here returns the result without performing actual read when any
> data
> > is
> > > left in the buffer.
> >
> > We always try to read |buffer_length| from the internal buffer. If there is
> not
> > enough, then we return partial results. If nothing is read, then we prefetch
> and
> > return as much as we can.
> >
> > > Why do you think we need to change this behavior for direct read case?
> >
> > If we skip contents in the buffer, then we will lose data. We always *must*
> > return all data from the internal buffer, before calling the internal file
> > stream reader for more data.
> I'm not saying we should leave data unread in the buffer.
> What I'm saying is that we should just put a if before the Preload() and let
the
> inner reader directly return the result when appropriate.
> >
> > > >
> > > > It is non trivial, and it will require a test case. But we won't use
this
> > > code,
> > > Sorry, I don't understand why you are OK with supporting synchronous
readers
> > and
> > > having test cases for it which we will never need,
> > > while you are feeling nervous about preparing for future chunk size
increase
> > > which may possibly happen.
> >
> > Support for synchronous readers:
> > + Let's us remove dependency on the internal file stream reader
> implementation.
> > + Makes code simpler, and DCHECK free. Think of GetLength.
> DCHECK cannot cause a malfunction on the product while "if" can.
> GetLength() is nothing here. All complexity of this class is dedicated to
> Read().
> > - Requires TEST_P. Test cases are same.
> Not exactly the same. There are a number of "mode_ == SYNCHRONOUS" in tests
and
> the fake classes.
Only one place in test cases. For the fake class, yes we have more, but they are
pretty trivial.
> > - We don't need it now.
> >
> > As for "preparing for future chunk size increase":
> > - Requires *new* test cases.
> > - We don't need it now.
> > - We will not need it in the future. (See below)
> > - May cause a lot of problems. (See below)
> > - We can easily add it later if needed.
> The problem here is that it's hard to know when it's needed.
> When the file system provided by extensions are slow, it's hard to know which
> part is the bottleneck.
> It can be the cloud service, the extension implementation, IPC between the
> provider extension and the browser process, data handling in the browser
> process, or Files.app.
You are saying, that the bottleneck can be anywhere, and I agree. Hence, just
blindly allowing a larger buffer (just in case) is IMHO not the correct solution
for the bottleneck problem. Especially, currently increasing the buffer size
over 512KB doesn't help and would move more responsibilities on developers.
I'm working on a performance analysis tool in chrome://provided-file-systems,
which would say time taken by each of the steps, and then we will be able to
optimize the API well.
> >
> > I honestly can't see any advantages.
> >
> > > > because of the current buffer length size restriction. If it grows to
> large
> > > size
> > > > in the future, then fetching a lot of data directly from providing
> > extensions
> > > > will cause request timeouts in case of remote providing extensions.
> > > If it was the case, shouldn't we allow providing extensions to decide how
to
> > > behave?
> > > (e.g. Depending on their servers' capability, capping the chunk size and
> > > returning smaller amount of data for read request.)
> >
> > 1. They already can return data in chunks, so eg. that 512KB they can split
to
> > eg. 10x51KB.
> > 2. If we add this "if", then the buffer size may be any in the future. Every
> > developer has to add logic for splitting it. But, I predict that most of
them
> > will not. So once we increase the buffer size in Chromium, all remote
> extensions
> > will start timeouting, because a lot of developers assumed (by observation)
> that
> > the buffer size is always 512KB.
> We shouldn't care about developers who depend on an undocumented behavior.
Shouldn't we? A lot of developers if they see that the buffer is always 512KB,
will not split it in smaller requests. If you don't want to care about such
developers, then a lot of apps in the Chrome store will be super flaky. And IMHO
we don't want this to happen.
I believe we should make the API as simple as possible, so developers do not
have much chance to break things. And still, we can't expect developers to write
perfect code. That's why we have timing out logic in the RequestManager, in case
developers do not handle an exception, or forget to call a callback.
> > 3. If the buffer is too large, then it will affect copying time between
remote
> > providing extensions and between them and Drive. We can't start uploading,
> > before the first chunk is fully downloaded. This means even 2x slower copy
if
> > the file size is around the buffer size.
> Our current drive implementation doesn't start uploading until the file gets
> closed.
AFAIR we are going to fix it (crbug.com/126902). Also, file stream providers
will support streamed uploading from beginning.
> > 4. Similarly, for too big buffer size, videos will load with a significant
> > delay.
> > 5. For big buffer size, copying progress bars will be very unreliable.
> >
> > Saying that, I'm still not convinced how having a large buffer, like couple
of
> > megabytes could be a good thing in general case. I mentioned several serious
> > problems above, and probably there is much more.
> It's not you who makes this decision.
> Anyone can modify the user code in fileapi layer to change the buffer size
with
> a good rationale without consulting you.
I'm not sure if I understand your point. BufferedFileSreamReader sets the buffer
to our preferred one, which is 512KB, and we don't care about the buffer size
set in the fileapi layer.
> >
> > > Also, isn't 512KB already large enough to cause timeout with poor network
> > > connection?
> >
> > It may be too big. I'm thinking of a solution for this, eg. a dialog like
> "This
> > operation takes longer than expected. Do you want to wait for it or abort?".
> Why do you need to implement a new UI when the operation is aborted thanks to
> the timeout?
In case HTTP connection is very slow, and the providing extension didn't finish
fetching chunk of data from the server. As you said, it may happen.
> >
> > > >
> > > > So basically, there is IMHO no reason to add this code path.
> > > >
> > > > Hence, I added a comment to make it clear that this class fetches data
> from
> > > the
> > > > underlying file stream reader in chunks of up to |buffer_length| size.
> > >
> >
>
hashimoto
2014/06/19 07:27:45
What I meant is that if you really care about the
On 2014/06/16 04:12:18, mtomasz wrote:
> On 2014/06/11 06:01:03, hashimoto wrote:
> > On 2014/06/10 12:23:55, mtomasz wrote:
> > > On 2014/06/10 10:43:24, hashimoto wrote:
> > > > On 2014/06/10 08:38:42, mtomasz wrote:
> > > > > On 2014/06/09 13:51:16, mtomasz wrote:
> > > > > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > > > > How about adding here something like:
> > > > > > > > >
> > > > > > > > > if (buffer_length >= buffer_size_)
> > > > > > > > > return file_stream_reader_->Read(buffer, buffer_length,
> > callback);
> > > > > > > > >
> > > > > > > > > I'm worrying, in future, the user code may pass a big value
for
> > > > > > > > |buffer_length|
> > > > > > > > > and this place may be a hidden bottleneck.
> > > > > > > >
> > > > > > > > The buffer length is not user controlled, but it is hard-coded
> > > currently
> > > > > to
> > > > > > > > 32KB. However, if it happened that it is larger than
|buffer_size|
> > > > (512KB)
> > > > > > and
> > > > > > > > we would have this extra-if, then we could crash the renderer
> > process,
> > > > > since
> > > > > > > the
> > > > > > > > maximum IPC message is 1MB.
> > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 *
1024;"
> > > > 128MB?
> > > > > > > >
> > > > > > > > Hence, the current code has two purposes - to increase chunk
size
> > from
> > > > > 32KB
> > > > > > to
> > > > > > > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > > > > > > If it was the purpose, why don't you add a static assert against
it?
> > > > > > >
> > > > > >
> > > > > > You're right, of course 128MB. In such case, the extra if makes of
> > course
> > > > > sense.
> > > > > > But how do you want to use a static assert here? Could you clarify?
> > > > If it was really a purpose of this class, having a static assert to
check
> > that
> > > > the chunk size is less than the IPC size limit will make your purpose
> > clearer
> > > > and prevent mysterious crash from happening when someone tries to
increase
> > the
> > > > chunk size in future.
> > >
> > > Do you want to crash with a static assert in such situations? Why? We want
> to
> > Static assert failure doesn't cause a crash. It causes a compile error.
>
> We can't use them then. We need to compile Chrome, even if the buffer is
larger.
> Buffer size is same for any file stream reader. BufferingFileStreamReader just
> adjusts it to our preferred size - which is now 512 KB.
>
> > > let providers work, even if the |buffer_length| is too large.
> > Didn't you say that the browser crashes when the buffer length is larger
than
> > the maximum IPC message size?
>
> These things are not related. BufferedFileStreamReader works if
> BufferedFileStreamReader::Read's |buffer_length| is larger than max IPC
message
> size, since the *inner file stream reader's* Read's would be called with
> |buffer_length| up to 512KB size, which is smaller than IPC message size. If
we
> introduced the if-statement you suggested, then we could crash.
What I meant is that if you really care about the 128MB IPC message size limit,
you should assert that the chunk size used by this class (512KB currently) is
smaller than the IPC size limit (128MB currently)
so that it can fail in the compile stage rather than browser_tests when someone
tries to change the chunk size to a value larger than 128MB.
I don't think we should care about 128MB IPC message size limit though.
>
> > >
> > > > >
> > > > > I thought about it again, and IMHO we should have a cap for the buffer
> > size
> > > as
> > > > > it is.
> > > > >
> > > > > We can't simply add that if. If the inner buffer already has some
data,
> we
> > > > have
> > > > > to copy them first, and then if |buffer_length - bytes_read| >
> > buffer_size_,
> > > > > then the rest directly.
> > > > The code here returns the result without performing actual read when any
> > data
> > > is
> > > > left in the buffer.
> > >
> > > We always try to read |buffer_length| from the internal buffer. If there
is
> > not
> > > enough, then we return partial results. If nothing is read, then we
prefetch
> > and
> > > return as much as we can.
> > >
> > > > Why do you think we need to change this behavior for direct read case?
> > >
> > > If we skip contents in the buffer, then we will lose data. We always
*must*
> > > return all data from the internal buffer, before calling the internal file
> > > stream reader for more data.
> > I'm not saying we should leave data unread in the buffer.
> > What I'm saying is that we should just put a if before the Preload() and let
> the
> > inner reader directly return the result when appropriate.
> > >
> > > > >
> > > > > It is non trivial, and it will require a test case. But we won't use
> this
> > > > code,
> > > > Sorry, I don't understand why you are OK with supporting synchronous
> readers
> > > and
> > > > having test cases for it which we will never need,
> > > > while you are feeling nervous about preparing for future chunk size
> increase
> > > > which may possibly happen.
> > >
> > > Support for synchronous readers:
> > > + Let's us remove dependency on the internal file stream reader
> > implementation.
> > > + Makes code simpler, and DCHECK free. Think of GetLength.
> > DCHECK cannot cause a malfunction on the product while "if" can.
> > GetLength() is nothing here. All complexity of this class is dedicated to
> > Read().
> > > - Requires TEST_P. Test cases are same.
> > Not exactly the same. There are a number of "mode_ == SYNCHRONOUS" in tests
> and
> > the fake classes.
>
> Only one place in test cases. For the fake class, yes we have more, but they
are
> pretty trivial.
>
> > > - We don't need it now.
> > >
> > > As for "preparing for future chunk size increase":
> > > - Requires *new* test cases.
> > > - We don't need it now.
> > > - We will not need it in the future. (See below)
> > > - May cause a lot of problems. (See below)
> > > - We can easily add it later if needed.
> > The problem here is that it's hard to know when it's needed.
> > When the file system provided by extensions are slow, it's hard to know
which
> > part is the bottleneck.
> > It can be the cloud service, the extension implementation, IPC between the
> > provider extension and the browser process, data handling in the browser
> > process, or Files.app.
>
> You are saying, that the bottleneck can be anywhere, and I agree. Hence, just
> blindly allowing a larger buffer (just in case) is IMHO not the correct
solution
> for the bottleneck problem. Especially, currently increasing the buffer size
> over 512KB doesn't help and would move more responsibilities on developers.
>
> I'm working on a performance analysis tool in chrome://provided-file-systems,
> which would say time taken by each of the steps, and then we will be able to
> optimize the API well.
IIUC the purpose of this class was just to reduce the number of IPCs.
Why did you suddenly start arguing that it should be capped to 512KB even when
fileapi wants to read in a larger sized chunk?
I don't see any problems in moving responsibilities to the extension when the
larger size is requested.
>
> > >
> > > I honestly can't see any advantages.
> > >
> > > > > because of the current buffer length size restriction. If it grows to
> > large
> > > > size
> > > > > in the future, then fetching a lot of data directly from providing
> > > extensions
> > > > > will cause request timeouts in case of remote providing extensions.
> > > > If it was the case, shouldn't we allow providing extensions to decide
how
> to
> > > > behave?
> > > > (e.g. Depending on their servers' capability, capping the chunk size and
> > > > returning smaller amount of data for read request.)
> > >
> > > 1. They already can return data in chunks, so eg. that 512KB they can
split
> to
> > > eg. 10x51KB.
> > > 2. If we add this "if", then the buffer size may be any in the future.
Every
> > > developer has to add logic for splitting it. But, I predict that most of
> them
> > > will not. So once we increase the buffer size in Chromium, all remote
> > extensions
> > > will start timeouting, because a lot of developers assumed (by
observation)
> > that
> > > the buffer size is always 512KB.
> > We shouldn't care about developers who depend on an undocumented behavior.
>
> Shouldn't we? A lot of developers if they see that the buffer is always 512KB,
> will not split it in smaller requests. If you don't want to care about such
> developers, then a lot of apps in the Chrome store will be super flaky. And
IMHO
> we don't want this to happen.
The API documentation is the only contract made between Chromium and extension
developers.
If you think this should be 512KB forever (even after you stop maintaining this
code and after technology advancement makes 512KB chunk size ridiculously
small), you should write it in the API documentation.
>
> I believe we should make the API as simple as possible, so developers do not
> have much chance to break things. And still, we can't expect developers to
write
> perfect code. That's why we have timing out logic in the RequestManager, in
case
> developers do not handle an exception, or forget to call a callback.
Timeout should be there because, without it, wrongly implemented extensions can
damage Chrome itself.
>
> > > 3. If the buffer is too large, then it will affect copying time between
> remote
> > > providing extensions and between them and Drive. We can't start uploading,
> > > before the first chunk is fully downloaded. This means even 2x slower copy
> if
> > > the file size is around the buffer size.
> > Our current drive implementation doesn't start uploading until the file gets
> > closed.
>
> AFAIR we are going to fix it (crbug.com/126902). Also, file stream providers
crbug.com/126902 is about upload for web sites like Gmail, not to Drive.
> will support streamed uploading from beginning.
Just mentioned to clarify that you shouldn't use our Drive client as an example
here.
>
> > > 4. Similarly, for too big buffer size, videos will load with a significant
> > > delay.
> > > 5. For big buffer size, copying progress bars will be very unreliable.
> > >
> > > Saying that, I'm still not convinced how having a large buffer, like
couple
> of
> > > megabytes could be a good thing in general case. I mentioned several
serious
> > > problems above, and probably there is much more.
> > It's not you who makes this decision.
> > Anyone can modify the user code in fileapi layer to change the buffer size
> with
> > a good rationale without consulting you.
>
> I'm not sure if I understand your point. BufferedFileSreamReader sets the
buffer
> to our preferred one, which is 512KB, and we don't care about the buffer size
> set in the fileapi layer.
This class's purpose is to reduce the number of IPCs, right?
If the fileapi is changed to use a chunk size larger than 512KB, it means that
we will end up doing the opposite thing, even after the technology advancement
or something makes 512KB chunk size ridiculously small.
Don't you want your code to work without problems for a decade or more?
>
> > >
> > > > Also, isn't 512KB already large enough to cause timeout with poor
network
> > > > connection?
> > >
> > > It may be too big. I'm thinking of a solution for this, eg. a dialog like
> > "This
> > > operation takes longer than expected. Do you want to wait for it or
abort?".
> > Why do you need to implement a new UI when the operation is aborted thanks
to
> > the timeout?
>
> In case HTTP connection is very slow, and the providing extension didn't
finish
> fetching chunk of data from the server. As you said, it may happen.
When the timeout happens, the operation gets aborted automatically.
You don't need to implement any new UI for it.
>
> > >
> > > > >
> > > > > So basically, there is IMHO no reason to add this code path.
> > > > >
> > > > > Hence, I added a comment to make it clear that this class fetches data
> > from
> > > > the
> > > > > underlying file stream reader in chunks of up to |buffer_length| size.
> > > >
> > >
> >
>
mtomasz
2014/06/20 04:20:23
I already answered this question.
> > > > - Requi
On 2014/06/19 07:27:45, hashimoto wrote:
> On 2014/06/16 04:12:18, mtomasz wrote:
> > On 2014/06/11 06:01:03, hashimoto wrote:
> > > On 2014/06/10 12:23:55, mtomasz wrote:
> > > > On 2014/06/10 10:43:24, hashimoto wrote:
> > > > > On 2014/06/10 08:38:42, mtomasz wrote:
> > > > > > On 2014/06/09 13:51:16, mtomasz wrote:
> > > > > > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > > > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > > > > > How about adding here something like:
> > > > > > > > > >
> > > > > > > > > > if (buffer_length >= buffer_size_)
> > > > > > > > > > return file_stream_reader_->Read(buffer, buffer_length,
> > > callback);
> > > > > > > > > >
> > > > > > > > > > I'm worrying, in future, the user code may pass a big value
> for
> > > > > > > > > |buffer_length|
> > > > > > > > > > and this place may be a hidden bottleneck.
> > > > > > > > >
> > > > > > > > > The buffer length is not user controlled, but it is hard-coded
> > > > currently
> > > > > > to
> > > > > > > > > 32KB. However, if it happened that it is larger than
> |buffer_size|
> > > > > (512KB)
> > > > > > > and
> > > > > > > > > we would have this extra-if, then we could crash the renderer
> > > process,
> > > > > > since
> > > > > > > > the
> > > > > > > > > maximum IPC message is 1MB.
> > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > > > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 *
> 1024;"
> > > > > 128MB?
> > > > > > > > >
> > > > > > > > > Hence, the current code has two purposes - to increase chunk
> size
> > > from
> > > > > > 32KB
> > > > > > > to
> > > > > > > > > 512KB, and to cap it to 512KB, so we won't crash the renderer.
> > > > > > > > If it was the purpose, why don't you add a static assert against
> it?
> > > > > > > >
> > > > > > >
> > > > > > > You're right, of course 128MB. In such case, the extra if makes of
> > > course
> > > > > > sense.
> > > > > > > But how do you want to use a static assert here? Could you
clarify?
> > > > > If it was really a purpose of this class, having a static assert to
> check
> > > that
> > > > > the chunk size is less than the IPC size limit will make your purpose
> > > clearer
> > > > > and prevent mysterious crash from happening when someone tries to
> increase
> > > the
> > > > > chunk size in future.
> > > >
> > > > Do you want to crash with a static assert in such situations? Why? We
want
> > to
> > > Static assert failure doesn't cause a crash. It causes a compile error.
> >
> > We can't use them then. We need to compile Chrome, even if the buffer is
> larger.
> > Buffer size is same for any file stream reader. BufferingFileStreamReader
just
> > adjusts it to our preferred size - which is now 512 KB.
> >
> > > > let providers work, even if the |buffer_length| is too large.
> > > Didn't you say that the browser crashes when the buffer length is larger
> than
> > > the maximum IPC message size?
> >
> > These things are not related. BufferedFileStreamReader works if
> > BufferedFileStreamReader::Read's |buffer_length| is larger than max IPC
> message
> > size, since the *inner file stream reader's* Read's would be called with
> > |buffer_length| up to 512KB size, which is smaller than IPC message size. If
> we
> > introduced the if-statement you suggested, then we could crash.
> What I meant is that if you really care about the 128MB IPC message size
limit,
> you should assert that the chunk size used by this class (512KB currently) is
> smaller than the IPC size limit (128MB currently)
> so that it can fail in the compile stage rather than browser_tests when
someone
> tries to change the chunk size to a value larger than 128MB.
> I don't think we should care about 128MB IPC message size limit though.
> >
> > > >
> > > > > >
> > > > > > I thought about it again, and IMHO we should have a cap for the
buffer
> > > size
> > > > as
> > > > > > it is.
> > > > > >
> > > > > > We can't simply add that if. If the inner buffer already has some
> data,
> > we
> > > > > have
> > > > > > to copy them first, and then if |buffer_length - bytes_read| >
> > > buffer_size_,
> > > > > > then the rest directly.
> > > > > The code here returns the result without performing actual read when
any
> > > data
> > > > is
> > > > > left in the buffer.
> > > >
> > > > We always try to read |buffer_length| from the internal buffer. If there
> is
> > > not
> > > > enough, then we return partial results. If nothing is read, then we
> prefetch
> > > and
> > > > return as much as we can.
> > > >
> > > > > Why do you think we need to change this behavior for direct read case?
> > > >
> > > > If we skip contents in the buffer, then we will lose data. We always
> *must*
> > > > return all data from the internal buffer, before calling the internal
file
> > > > stream reader for more data.
> > > I'm not saying we should leave data unread in the buffer.
> > > What I'm saying is that we should just put a if before the Preload() and
let
> > the
> > > inner reader directly return the result when appropriate.
> > > >
> > > > > >
> > > > > > It is non trivial, and it will require a test case. But we won't use
> > this
> > > > > code,
> > > > > Sorry, I don't understand why you are OK with supporting synchronous
> > readers
> > > > and
> > > > > having test cases for it which we will never need,
> > > > > while you are feeling nervous about preparing for future chunk size
> > increase
> > > > > which may possibly happen.
> > > >
> > > > Support for synchronous readers:
> > > > + Let's us remove dependency on the internal file stream reader
> > > implementation.
> > > > + Makes code simpler, and DCHECK free. Think of GetLength.
> > > DCHECK cannot cause a malfunction on the product while "if" can.
> > > GetLength() is nothing here. All complexity of this class is dedicated to
> > > Read().
> > > > - Requires TEST_P. Test cases are same.
> > > Not exactly the same. There are a number of "mode_ == SYNCHRONOUS" in
tests
> > and
> > > the fake classes.
> >
> > Only one place in test cases. For the fake class, yes we have more, but they
> are
> > pretty trivial.
> >
> > > > - We don't need it now.
> > > >
> > > > As for "preparing for future chunk size increase":
> > > > - Requires *new* test cases.
> > > > - We don't need it now.
> > > > - We will not need it in the future. (See below)
> > > > - May cause a lot of problems. (See below)
> > > > - We can easily add it later if needed.
> > > The problem here is that it's hard to know when it's needed.
> > > When the file system provided by extensions are slow, it's hard to know
> which
> > > part is the bottleneck.
> > > It can be the cloud service, the extension implementation, IPC between the
> > > provider extension and the browser process, data handling in the browser
> > > process, or Files.app.
> >
> > You are saying, that the bottleneck can be anywhere, and I agree. Hence,
just
> > blindly allowing a larger buffer (just in case) is IMHO not the correct
> solution
> > for the bottleneck problem. Especially, currently increasing the buffer size
> > over 512KB doesn't help and would move more responsibilities on developers.
> >
> > I'm working on a performance analysis tool in
chrome://provided-file-systems,
> > which would say time taken by each of the steps, and then we will be able to
> > optimize the API well.
> IIUC the purpose of this class was just to reduce the number of IPCs.
> Why did you suddenly start arguing that it should be capped to 512KB even when
> fileapi wants to read in a larger sized chunk?
> I don't see any problems in moving responsibilities to the extension when the
> larger size is requested.
I already answered this question.
> > > > - Requires *new* test cases.
> > > > - We don't need it now.
> > > > - We will not need it in the future. (See below)
> > > > - May cause a lot of problems. (See below)
> > > > - We can easily add it later if needed.
> >
> > > >
> > > > I honestly can't see any advantages.
> > > >
> > > > > > because of the current buffer length size restriction. If it grows
to
> > > large
> > > > > size
> > > > > > in the future, then fetching a lot of data directly from providing
> > > > extensions
> > > > > > will cause request timeouts in case of remote providing extensions.
> > > > > If it was the case, shouldn't we allow providing extensions to decide
> how
> > to
> > > > > behave?
> > > > > (e.g. Depending on their servers' capability, capping the chunk size
and
> > > > > returning smaller amount of data for read request.)
> > > >
> > > > 1. They already can return data in chunks, so eg. that 512KB they can
> split
> > to
> > > > eg. 10x51KB.
> > > > 2. If we add this "if", then the buffer size may be any in the future.
> Every
> > > > developer has to add logic for splitting it. But, I predict that most of
> > them
> > > > will not. So once we increase the buffer size in Chromium, all remote
> > > extensions
> > > > will start timeouting, because a lot of developers assumed (by
> observation)
> > > that
> > > > the buffer size is always 512KB.
> > > We shouldn't care about developers who depend on an undocumented behavior.
> >
> > Shouldn't we? A lot of developers if they see that the buffer is always
512KB,
> > will not split it in smaller requests. If you don't want to care about such
> > developers, then a lot of apps in the Chrome store will be super flaky. And
> IMHO
> > we don't want this to happen.
> The API documentation is the only contract made between Chromium and extension
> developers.
> If you think this should be 512KB forever (even after you stop maintaining
this
> code and after technology advancement makes 512KB chunk size ridiculously
> small), you should write it in the API documentation.
I'm not planning to write it in the documentation. Developers should chop the
requests into chunks, and they shouldn't assume that the buffer size is fixed to
any value. But I know that a lot of developers will not chop. Don't you think
so?
> >
> > I believe we should make the API as simple as possible, so developers do not
> > have much chance to break things. And still, we can't expect developers to
> write
> > perfect code. That's why we have timing out logic in the RequestManager, in
> case
> > developers do not handle an exception, or forget to call a callback.
> Timeout should be there because, without it, wrongly implemented extensions
can
> damage Chrome itself.
> >
> > > > 3. If the buffer is too large, then it will affect copying time between
> > remote
> > > > providing extensions and between them and Drive. We can't start
uploading,
> > > > before the first chunk is fully downloaded. This means even 2x slower
copy
> > if
> > > > the file size is around the buffer size.
> > > Our current drive implementation doesn't start uploading until the file
gets
> > > closed.
> >
> > AFAIR we are going to fix it (crbug.com/126902). Also, file stream providers
> crbug.com/126902 is about upload for web sites like Gmail, not to Drive.
Got it. Do we have any plans to support streamed uploading in Drive then?
> > will support streamed uploading from beginning.
> Just mentioned to clarify that you shouldn't use our Drive client as an
example
> here.
> >
> > > > 4. Similarly, for too big buffer size, videos will load with a
significant
> > > > delay.
> > > > 5. For big buffer size, copying progress bars will be very unreliable.
> > > >
> > > > Saying that, I'm still not convinced how having a large buffer, like
> couple
> > of
> > > > megabytes could be a good thing in general case. I mentioned several
> serious
> > > > problems above, and probably there is much more.
> > > It's not you who makes this decision.
> > > Anyone can modify the user code in fileapi layer to change the buffer size
> > with
> > > a good rationale without consulting you.
> >
> > I'm not sure if I understand your point. BufferedFileSreamReader sets the
> buffer
> > to our preferred one, which is 512KB, and we don't care about the buffer
size
> > set in the fileapi layer.
> This class's purpose is to reduce the number of IPCs, right?
> If the fileapi is changed to use a chunk size larger than 512KB, it means that
> we will end up doing the opposite thing, even after the technology advancement
> or something makes 512KB chunk size ridiculously small.
> Don't you want your code to work without problems for a decade or more?
Purpose of this class is to set the buffer size to our preferred size. You are
suggesting to write non-trivial code which will be most probably not used for
years and will not improve performance. That's why I'm not convinced. But if you
insist, I can add it.
> >
> > > >
> > > > > Also, isn't 512KB already large enough to cause timeout with poor
> network
> > > > > connection?
> > > >
> > > > It may be too big. I'm thinking of a solution for this, eg. a dialog
like
> > > "This
> > > > operation takes longer than expected. Do you want to wait for it or
> abort?".
> > > Why do you need to implement a new UI when the operation is aborted thanks
> to
> > > the timeout?
> >
> > In case HTTP connection is very slow, and the providing extension didn't
> finish
> > fetching chunk of data from the server. As you said, it may happen.
> When the timeout happens, the operation gets aborted automatically.
> You don't need to implement any new UI for it.
We can't blindly timeout each request after 10 seconds. It may happen that a
providing extension shows a dialog for credentials, and the operation would take
minutes. We don't want to abort in such case.
Please read the API proposals as well as the "File System Provider API Request
Policy" (slightly outdated) I shared with the team. The timing out problem is
described there in details.
> >
> > > >
> > > > > >
> > > > > > So basically, there is IMHO no reason to add this code path.
> > > > > >
> > > > > > Hence, I added a comment to make it clear that this class fetches
data
> > > from
> > > > > the
> > > > > > underlying file stream reader in chunks of up to |buffer_length|
size.
> > > > >
> > > >
> > >
> >
>
hashimoto
2014/06/27 02:17:58
Adding a test case is better than being the bottle
On 2014/06/20 04:20:23, mtomasz wrote:
> On 2014/06/19 07:27:45, hashimoto wrote:
> > On 2014/06/16 04:12:18, mtomasz wrote:
> > > On 2014/06/11 06:01:03, hashimoto wrote:
> > > > On 2014/06/10 12:23:55, mtomasz wrote:
> > > > > On 2014/06/10 10:43:24, hashimoto wrote:
> > > > > > On 2014/06/10 08:38:42, mtomasz wrote:
> > > > > > > On 2014/06/09 13:51:16, mtomasz wrote:
> > > > > > > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > > > > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > > > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > > > > > > How about adding here something like:
> > > > > > > > > > >
> > > > > > > > > > > if (buffer_length >= buffer_size_)
> > > > > > > > > > > return file_stream_reader_->Read(buffer, buffer_length,
> > > > callback);
> > > > > > > > > > >
> > > > > > > > > > > I'm worrying, in future, the user code may pass a big
value
> > for
> > > > > > > > > > |buffer_length|
> > > > > > > > > > > and this place may be a hidden bottleneck.
> > > > > > > > > >
> > > > > > > > > > The buffer length is not user controlled, but it is
hard-coded
> > > > > currently
> > > > > > > to
> > > > > > > > > > 32KB. However, if it happened that it is larger than
> > |buffer_size|
> > > > > > (512KB)
> > > > > > > > and
> > > > > > > > > > we would have this extra-if, then we could crash the
renderer
> > > > process,
> > > > > > > since
> > > > > > > > > the
> > > > > > > > > > maximum IPC message is 1MB.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > > > > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024 *
> > 1024;"
> > > > > > 128MB?
> > > > > > > > > >
> > > > > > > > > > Hence, the current code has two purposes - to increase chunk
> > size
> > > > from
> > > > > > > 32KB
> > > > > > > > to
> > > > > > > > > > 512KB, and to cap it to 512KB, so we won't crash the
renderer.
> > > > > > > > > If it was the purpose, why don't you add a static assert
against
> > it?
> > > > > > > > >
> > > > > > > >
> > > > > > > > You're right, of course 128MB. In such case, the extra if makes
of
> > > > course
> > > > > > > sense.
> > > > > > > > But how do you want to use a static assert here? Could you
> clarify?
> > > > > > If it was really a purpose of this class, having a static assert to
> > check
> > > > that
> > > > > > the chunk size is less than the IPC size limit will make your
purpose
> > > > clearer
> > > > > > and prevent mysterious crash from happening when someone tries to
> > increase
> > > > the
> > > > > > chunk size in future.
> > > > >
> > > > > Do you want to crash with a static assert in such situations? Why? We
> want
> > > to
> > > > Static assert failure doesn't cause a crash. It causes a compile error.
> > >
> > > We can't use them then. We need to compile Chrome, even if the buffer is
> > larger.
> > > Buffer size is same for any file stream reader. BufferingFileStreamReader
> just
> > > adjusts it to our preferred size - which is now 512 KB.
> > >
> > > > > let providers work, even if the |buffer_length| is too large.
> > > > Didn't you say that the browser crashes when the buffer length is larger
> > than
> > > > the maximum IPC message size?
> > >
> > > These things are not related. BufferedFileStreamReader works if
> > > BufferedFileStreamReader::Read's |buffer_length| is larger than max IPC
> > message
> > > size, since the *inner file stream reader's* Read's would be called with
> > > |buffer_length| up to 512KB size, which is smaller than IPC message size.
If
> > we
> > > introduced the if-statement you suggested, then we could crash.
> > What I meant is that if you really care about the 128MB IPC message size
> limit,
> > you should assert that the chunk size used by this class (512KB currently)
is
> > smaller than the IPC size limit (128MB currently)
> > so that it can fail in the compile stage rather than browser_tests when
> someone
> > tries to change the chunk size to a value larger than 128MB.
> > I don't think we should care about 128MB IPC message size limit though.
> > >
> > > > >
> > > > > > >
> > > > > > > I thought about it again, and IMHO we should have a cap for the
> buffer
> > > > size
> > > > > as
> > > > > > > it is.
> > > > > > >
> > > > > > > We can't simply add that if. If the inner buffer already has some
> > data,
> > > we
> > > > > > have
> > > > > > > to copy them first, and then if |buffer_length - bytes_read| >
> > > > buffer_size_,
> > > > > > > then the rest directly.
> > > > > > The code here returns the result without performing actual read when
> any
> > > > data
> > > > > is
> > > > > > left in the buffer.
> > > > >
> > > > > We always try to read |buffer_length| from the internal buffer. If
there
> > is
> > > > not
> > > > > enough, then we return partial results. If nothing is read, then we
> > prefetch
> > > > and
> > > > > return as much as we can.
> > > > >
> > > > > > Why do you think we need to change this behavior for direct read
case?
> > > > >
> > > > > If we skip contents in the buffer, then we will lose data. We always
> > *must*
> > > > > return all data from the internal buffer, before calling the internal
> file
> > > > > stream reader for more data.
> > > > I'm not saying we should leave data unread in the buffer.
> > > > What I'm saying is that we should just put a if before the Preload() and
> let
> > > the
> > > > inner reader directly return the result when appropriate.
> > > > >
> > > > > > >
> > > > > > > It is non trivial, and it will require a test case. But we won't
use
> > > this
> > > > > > code,
> > > > > > Sorry, I don't understand why you are OK with supporting synchronous
> > > readers
> > > > > and
> > > > > > having test cases for it which we will never need,
> > > > > > while you are feeling nervous about preparing for future chunk size
> > > increase
> > > > > > which may possibly happen.
> > > > >
> > > > > Support for synchronous readers:
> > > > > + Let's us remove dependency on the internal file stream reader
> > > > implementation.
> > > > > + Makes code simpler, and DCHECK free. Think of GetLength.
> > > > DCHECK cannot cause a malfunction on the product while "if" can.
> > > > GetLength() is nothing here. All complexity of this class is dedicated
to
> > > > Read().
> > > > > - Requires TEST_P. Test cases are same.
> > > > Not exactly the same. There are a number of "mode_ == SYNCHRONOUS" in
> tests
> > > and
> > > > the fake classes.
> > >
> > > Only one place in test cases. For the fake class, yes we have more, but
they
> > are
> > > pretty trivial.
> > >
> > > > > - We don't need it now.
> > > > >
> > > > > As for "preparing for future chunk size increase":
> > > > > - Requires *new* test cases.
> > > > > - We don't need it now.
> > > > > - We will not need it in the future. (See below)
> > > > > - May cause a lot of problems. (See below)
> > > > > - We can easily add it later if needed.
> > > > The problem here is that it's hard to know when it's needed.
> > > > When the file system provided by extensions are slow, it's hard to know
> > which
> > > > part is the bottleneck.
> > > > It can be the cloud service, the extension implementation, IPC between
the
> > > > provider extension and the browser process, data handling in the browser
> > > > process, or Files.app.
> > >
> > > You are saying, that the bottleneck can be anywhere, and I agree. Hence,
> just
> > > blindly allowing a larger buffer (just in case) is IMHO not the correct
> > solution
> > > for the bottleneck problem. Especially, currently increasing the buffer
size
> > > over 512KB doesn't help and would move more responsibilities on
developers.
> > >
> > > I'm working on a performance analysis tool in
> chrome://provided-file-systems,
> > > which would say time taken by each of the steps, and then we will be able
to
> > > optimize the API well.
> > IIUC the purpose of this class was just to reduce the number of IPCs.
> > Why did you suddenly start arguing that it should be capped to 512KB even
when
> > fileapi wants to read in a larger sized chunk?
> > I don't see any problems in moving responsibilities to the extension when
the
> > larger size is requested.
>
> I already answered this question.
> > > > > - Requires *new* test cases.
Adding a test case is better than being the bottleneck.
> > > > > - We don't need it now.
We may need it in future.
> > > > > - We will not need it in the future. (See below)
?
> > > > > - May cause a lot of problems. (See below)
?
> > > > > - We can easily add it later if needed.
The problem is that there is no easy way to know when this place becomes the
bottleneck.
>
> > >
> > > > >
> > > > > I honestly can't see any advantages.
> > > > >
> > > > > > > because of the current buffer length size restriction. If it grows
> to
> > > > large
> > > > > > size
> > > > > > > in the future, then fetching a lot of data directly from providing
> > > > > extensions
> > > > > > > will cause request timeouts in case of remote providing
extensions.
> > > > > > If it was the case, shouldn't we allow providing extensions to
decide
> > how
> > > to
> > > > > > behave?
> > > > > > (e.g. Depending on their servers' capability, capping the chunk size
> and
> > > > > > returning smaller amount of data for read request.)
> > > > >
> > > > > 1. They already can return data in chunks, so eg. that 512KB they can
> > split
> > > to
> > > > > eg. 10x51KB.
> > > > > 2. If we add this "if", then the buffer size may be any in the future.
> > Every
> > > > > developer has to add logic for splitting it. But, I predict that most
of
> > > them
> > > > > will not. So once we increase the buffer size in Chromium, all remote
> > > > extensions
> > > > > will start timeouting, because a lot of developers assumed (by
> > observation)
> > > > that
> > > > > the buffer size is always 512KB.
> > > > We shouldn't care about developers who depend on an undocumented
behavior.
> > >
> > > Shouldn't we? A lot of developers if they see that the buffer is always
> 512KB,
> > > will not split it in smaller requests. If you don't want to care about
such
> > > developers, then a lot of apps in the Chrome store will be super flaky.
And
> > IMHO
> > > we don't want this to happen.
> > The API documentation is the only contract made between Chromium and
extension
> > developers.
> > If you think this should be 512KB forever (even after you stop maintaining
> this
> > code and after technology advancement makes 512KB chunk size ridiculously
> > small), you should write it in the API documentation.
>
> I'm not planning to write it in the documentation. Developers should chop the
> requests into chunks, and they shouldn't assume that the buffer size is fixed
to
> any value. But I know that a lot of developers will not chop. Don't you think
> so?
We shouldn't care about developers who write code which stops working when the
read chunk size is changed.
They are simply violating the contract.
>
> > >
> > > I believe we should make the API as simple as possible, so developers do
not
> > > have much chance to break things. And still, we can't expect developers to
> > write
> > > perfect code. That's why we have timing out logic in the RequestManager,
in
> > case
> > > developers do not handle an exception, or forget to call a callback.
> > Timeout should be there because, without it, wrongly implemented extensions
> can
> > damage Chrome itself.
> > >
> > > > > 3. If the buffer is too large, then it will affect copying time
between
> > > remote
> > > > > providing extensions and between them and Drive. We can't start
> uploading,
> > > > > before the first chunk is fully downloaded. This means even 2x slower
> copy
> > > if
> > > > > the file size is around the buffer size.
> > > > Our current drive implementation doesn't start uploading until the file
> gets
> > > > closed.
> > >
> > > AFAIR we are going to fix it (crbug.com/126902). Also, file stream
providers
> > crbug.com/126902 is about upload for web sites like Gmail, not to Drive.
> Got it. Do we have any plans to support streamed uploading in Drive then?
No.
>
> > > will support streamed uploading from beginning.
> > Just mentioned to clarify that you shouldn't use our Drive client as an
> example
> > here.
> > >
> > > > > 4. Similarly, for too big buffer size, videos will load with a
> significant
> > > > > delay.
> > > > > 5. For big buffer size, copying progress bars will be very unreliable.
> > > > >
> > > > > Saying that, I'm still not convinced how having a large buffer, like
> > couple
> > > of
> > > > > megabytes could be a good thing in general case. I mentioned several
> > serious
> > > > > problems above, and probably there is much more.
> > > > It's not you who makes this decision.
> > > > Anyone can modify the user code in fileapi layer to change the buffer
size
> > > with
> > > > a good rationale without consulting you.
> > >
> > > I'm not sure if I understand your point. BufferedFileSreamReader sets the
> > buffer
> > > to our preferred one, which is 512KB, and we don't care about the buffer
> size
> > > set in the fileapi layer.
> > This class's purpose is to reduce the number of IPCs, right?
> > If the fileapi is changed to use a chunk size larger than 512KB, it means
that
> > we will end up doing the opposite thing, even after the technology
advancement
> > or something makes 512KB chunk size ridiculously small.
> > Don't you want your code to work without problems for a decade or more?
>
> Purpose of this class is to set the buffer size to our preferred size. You are
> suggesting to write non-trivial code which will be most probably not used for
> years and will not improve performance. That's why I'm not convinced. But if
you
> insist, I can add it.
This code may be used in years, while the code to handle the synchronously
returned results won't be used until the end of universe.
Why and how should we decide that the "preferred size" is 512 KB?
When the requested data size is larger than 512KB, this class should do nothing.
>
> > >
> > > > >
> > > > > > Also, isn't 512KB already large enough to cause timeout with poor
> > network
> > > > > > connection?
> > > > >
> > > > > It may be too big. I'm thinking of a solution for this, eg. a dialog
> like
> > > > "This
> > > > > operation takes longer than expected. Do you want to wait for it or
> > abort?".
> > > > Why do you need to implement a new UI when the operation is aborted
thanks
> > to
> > > > the timeout?
> > >
> > > In case HTTP connection is very slow, and the providing extension didn't
> > finish
> > > fetching chunk of data from the server. As you said, it may happen.
> > When the timeout happens, the operation gets aborted automatically.
> > You don't need to implement any new UI for it.
>
> We can't blindly timeout each request after 10 seconds. It may happen that a
> providing extension shows a dialog for credentials, and the operation would
take
> minutes. We don't want to abort in such case.
Does it mean we show cancel dialog every time when the user takes long time to
finish the authentication?
Isn't it annoying?
Shouldn't we add a dedicated API mechanism to let the provider extension finish
the authentication step?
>
> Please read the API proposals as well as the "File System Provider API Request
> Policy" (slightly outdated) I shared with the team. The timing out problem is
> described there in details.
>
> > >
> > > > >
> > > > > > >
> > > > > > > So basically, there is IMHO no reason to add this code path.
> > > > > > >
> > > > > > > Hence, I added a comment to make it clear that this class fetches
> data
> > > > from
> > > > > > the
> > > > > > > underlying file stream reader in chunks of up to |buffer_length|
> size.
> > > > > >
> > > > >
> > > >
> > >
> >
>
mtomasz
2014/06/27 05:00:25
TOn 2014/06/27 02:17:58, hashimoto wrote:
TOn 2014/06/27 02:17:58, hashimoto wrote:
> On 2014/06/20 04:20:23, mtomasz wrote:
> > On 2014/06/19 07:27:45, hashimoto wrote:
> > > On 2014/06/16 04:12:18, mtomasz wrote:
> > > > On 2014/06/11 06:01:03, hashimoto wrote:
> > > > > On 2014/06/10 12:23:55, mtomasz wrote:
> > > > > > On 2014/06/10 10:43:24, hashimoto wrote:
> > > > > > > On 2014/06/10 08:38:42, mtomasz wrote:
> > > > > > > > On 2014/06/09 13:51:16, mtomasz wrote:
> > > > > > > > > On 2014/06/09 11:12:04, hashimoto wrote:
> > > > > > > > > > On 2014/06/06 04:22:56, mtomasz wrote:
> > > > > > > > > > > On 2014/06/06 03:36:52, hashimoto wrote:
> > > > > > > > > > > > How about adding here something like:
> > > > > > > > > > > >
> > > > > > > > > > > > if (buffer_length >= buffer_size_)
> > > > > > > > > > > > return file_stream_reader_->Read(buffer,
buffer_length,
> > > > > callback);
> > > > > > > > > > > >
> > > > > > > > > > > > I'm worrying, in future, the user code may pass a big
> value
> > > for
> > > > > > > > > > > |buffer_length|
> > > > > > > > > > > > and this place may be a hidden bottleneck.
> > > > > > > > > > >
> > > > > > > > > > > The buffer length is not user controlled, but it is
> hard-coded
> > > > > > currently
> > > > > > > > to
> > > > > > > > > > > 32KB. However, if it happened that it is larger than
> > > |buffer_size|
> > > > > > > (512KB)
> > > > > > > > > and
> > > > > > > > > > > we would have this extra-if, then we could crash the
> renderer
> > > > > process,
> > > > > > > > since
> > > > > > > > > > the
> > > > > > > > > > > maximum IPC message is 1MB.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
https://code.google.com/p/chromium/codesearch#chromium/src/ipc/ipc_channel.h&...
> > > > > > > > > > Isn't "static const size_t kMaximumMessageSize = 128 * 1024
*
> > > 1024;"
> > > > > > > 128MB?
> > > > > > > > > > >
> > > > > > > > > > > Hence, the current code has two purposes - to increase
chunk
> > > size
> > > > > from
> > > > > > > > 32KB
> > > > > > > > > to
> > > > > > > > > > > 512KB, and to cap it to 512KB, so we won't crash the
> renderer.
> > > > > > > > > > If it was the purpose, why don't you add a static assert
> against
> > > it?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > You're right, of course 128MB. In such case, the extra if
makes
> of
> > > > > course
> > > > > > > > sense.
> > > > > > > > > But how do you want to use a static assert here? Could you
> > clarify?
> > > > > > > If it was really a purpose of this class, having a static assert
to
> > > check
> > > > > that
> > > > > > > the chunk size is less than the IPC size limit will make your
> purpose
> > > > > clearer
> > > > > > > and prevent mysterious crash from happening when someone tries to
> > > increase
> > > > > the
> > > > > > > chunk size in future.
> > > > > >
> > > > > > Do you want to crash with a static assert in such situations? Why?
We
> > want
> > > > to
> > > > > Static assert failure doesn't cause a crash. It causes a compile
error.
> > > >
> > > > We can't use them then. We need to compile Chrome, even if the buffer is
> > > larger.
> > > > Buffer size is same for any file stream reader.
BufferingFileStreamReader
> > just
> > > > adjusts it to our preferred size - which is now 512 KB.
> > > >
> > > > > > let providers work, even if the |buffer_length| is too large.
> > > > > Didn't you say that the browser crashes when the buffer length is
larger
> > > than
> > > > > the maximum IPC message size?
> > > >
> > > > These things are not related. BufferedFileStreamReader works if
> > > > BufferedFileStreamReader::Read's |buffer_length| is larger than max IPC
> > > message
> > > > size, since the *inner file stream reader's* Read's would be called with
> > > > |buffer_length| up to 512KB size, which is smaller than IPC message
size.
> If
> > > we
> > > > introduced the if-statement you suggested, then we could crash.
> > > What I meant is that if you really care about the 128MB IPC message size
> > limit,
> > > you should assert that the chunk size used by this class (512KB currently)
> is
> > > smaller than the IPC size limit (128MB currently)
> > > so that it can fail in the compile stage rather than browser_tests when
> > someone
> > > tries to change the chunk size to a value larger than 128MB.
> > > I don't think we should care about 128MB IPC message size limit though.
> > > >
> > > > > >
> > > > > > > >
> > > > > > > > I thought about it again, and IMHO we should have a cap for the
> > buffer
> > > > > size
> > > > > > as
> > > > > > > > it is.
> > > > > > > >
> > > > > > > > We can't simply add that if. If the inner buffer already has
some
> > > data,
> > > > we
> > > > > > > have
> > > > > > > > to copy them first, and then if |buffer_length - bytes_read| >
> > > > > buffer_size_,
> > > > > > > > then the rest directly.
> > > > > > > The code here returns the result without performing actual read
when
> > any
> > > > > data
> > > > > > is
> > > > > > > left in the buffer.
> > > > > >
> > > > > > We always try to read |buffer_length| from the internal buffer. If
> there
> > > is
> > > > > not
> > > > > > enough, then we return partial results. If nothing is read, then we
> > > prefetch
> > > > > and
> > > > > > return as much as we can.
> > > > > >
> > > > > > > Why do you think we need to change this behavior for direct read
> case?
> > > > > >
> > > > > > If we skip contents in the buffer, then we will lose data. We always
> > > *must*
> > > > > > return all data from the internal buffer, before calling the
internal
> > file
> > > > > > stream reader for more data.
> > > > > I'm not saying we should leave data unread in the buffer.
> > > > > What I'm saying is that we should just put a if before the Preload()
and
> > let
> > > > the
> > > > > inner reader directly return the result when appropriate.
> > > > > >
> > > > > > > >
> > > > > > > > It is non trivial, and it will require a test case. But we won't
> use
> > > > this
> > > > > > > code,
> > > > > > > Sorry, I don't understand why you are OK with supporting
synchronous
> > > > readers
> > > > > > and
> > > > > > > having test cases for it which we will never need,
> > > > > > > while you are feeling nervous about preparing for future chunk
size
> > > > increase
> > > > > > > which may possibly happen.
> > > > > >
> > > > > > Support for synchronous readers:
> > > > > > + Let's us remove dependency on the internal file stream reader
> > > > > implementation.
> > > > > > + Makes code simpler, and DCHECK free. Think of GetLength.
> > > > > DCHECK cannot cause a malfunction on the product while "if" can.
> > > > > GetLength() is nothing here. All complexity of this class is dedicated
> to
> > > > > Read().
> > > > > > - Requires TEST_P. Test cases are same.
> > > > > Not exactly the same. There are a number of "mode_ == SYNCHRONOUS" in
> > tests
> > > > and
> > > > > the fake classes.
> > > >
> > > > Only one place in test cases. For the fake class, yes we have more, but
> they
> > > are
> > > > pretty trivial.
> > > >
> > > > > > - We don't need it now.
> > > > > >
> > > > > > As for "preparing for future chunk size increase":
> > > > > > - Requires *new* test cases.
> > > > > > - We don't need it now.
> > > > > > - We will not need it in the future. (See below)
> > > > > > - May cause a lot of problems. (See below)
> > > > > > - We can easily add it later if needed.
> > > > > The problem here is that it's hard to know when it's needed.
> > > > > When the file system provided by extensions are slow, it's hard to
know
> > > which
> > > > > part is the bottleneck.
> > > > > It can be the cloud service, the extension implementation, IPC between
> the
> > > > > provider extension and the browser process, data handling in the
browser
> > > > > process, or Files.app.
> > > >
> > > > You are saying, that the bottleneck can be anywhere, and I agree. Hence,
> > just
> > > > blindly allowing a larger buffer (just in case) is IMHO not the correct
> > > solution
> > > > for the bottleneck problem. Especially, currently increasing the buffer
> size
> > > > over 512KB doesn't help and would move more responsibilities on
> developers.
> > > >
> > > > I'm working on a performance analysis tool in
> > chrome://provided-file-systems,
> > > > which would say time taken by each of the steps, and then we will be
able
> to
> > > > optimize the API well.
> > > IIUC the purpose of this class was just to reduce the number of IPCs.
> > > Why did you suddenly start arguing that it should be capped to 512KB even
> when
> > > fileapi wants to read in a larger sized chunk?
> > > I don't see any problems in moving responsibilities to the extension when
> the
> > > larger size is requested.
> >
> > I already answered this question.
> > > > > > - Requires *new* test cases.
> Adding a test case is better than being the bottleneck.
Removing the 512KB limit may cause another bottlenecks (streamed uploading),
which I explained before. But I'll just add this code so we can move forward.
> > > > > > - We don't need it now.
> We may need it in future.
> > > > > > - We will not need it in the future. (See below)
> ?
> > > > > > - May cause a lot of problems. (See below)
> ?
What does this question mark mean here?
> > > > > > - We can easily add it later if needed.
> The problem is that there is no easy way to know when this place becomes the
> bottleneck.
> >
> > > >
> > > > > >
> > > > > > I honestly can't see any advantages.
> > > > > >
> > > > > > > > because of the current buffer length size restriction. If it
grows
> > to
> > > > > large
> > > > > > > size
> > > > > > > > in the future, then fetching a lot of data directly from
providing
> > > > > > extensions
> > > > > > > > will cause request timeouts in case of remote providing
> extensions.
> > > > > > > If it was the case, shouldn't we allow providing extensions to
> decide
> > > how
> > > > to
> > > > > > > behave?
> > > > > > > (e.g. Depending on their servers' capability, capping the chunk
size
> > and
> > > > > > > returning smaller amount of data for read request.)
> > > > > >
> > > > > > 1. They already can return data in chunks, so eg. that 512KB they
can
> > > split
> > > > to
> > > > > > eg. 10x51KB.
> > > > > > 2. If we add this "if", then the buffer size may be any in the
future.
> > > Every
> > > > > > developer has to add logic for splitting it. But, I predict that
most
> of
> > > > them
> > > > > > will not. So once we increase the buffer size in Chromium, all
remote
> > > > > extensions
> > > > > > will start timeouting, because a lot of developers assumed (by
> > > observation)
> > > > > that
> > > > > > the buffer size is always 512KB.
> > > > > We shouldn't care about developers who depend on an undocumented
> behavior.
> > > >
> > > > Shouldn't we? A lot of developers if they see that the buffer is always
> > 512KB,
> > > > will not split it in smaller requests. If you don't want to care about
> such
> > > > developers, then a lot of apps in the Chrome store will be super flaky.
> And
> > > IMHO
> > > > we don't want this to happen.
> > > The API documentation is the only contract made between Chromium and
> extension
> > > developers.
> > > If you think this should be 512KB forever (even after you stop maintaining
> > this
> > > code and after technology advancement makes 512KB chunk size ridiculously
> > > small), you should write it in the API documentation.
> >
> > I'm not planning to write it in the documentation. Developers should chop
the
> > requests into chunks, and they shouldn't assume that the buffer size is
fixed
> to
> > any value. But I know that a lot of developers will not chop. Don't you
think
> > so?
> We shouldn't care about developers who write code which stops working when the
> read chunk size is changed.
> They are simply violating the contract.
> >
> > > >
> > > > I believe we should make the API as simple as possible, so developers do
> not
> > > > have much chance to break things. And still, we can't expect developers
to
> > > write
> > > > perfect code. That's why we have timing out logic in the RequestManager,
> in
> > > case
> > > > developers do not handle an exception, or forget to call a callback.
> > > Timeout should be there because, without it, wrongly implemented
extensions
> > can
> > > damage Chrome itself.
> > > >
> > > > > > 3. If the buffer is too large, then it will affect copying time
> between
> > > > remote
> > > > > > providing extensions and between them and Drive. We can't start
> > uploading,
> > > > > > before the first chunk is fully downloaded. This means even 2x
slower
> > copy
> > > > if
> > > > > > the file size is around the buffer size.
> > > > > Our current drive implementation doesn't start uploading until the
file
> > gets
> > > > > closed.
> > > >
> > > > AFAIR we are going to fix it (crbug.com/126902). Also, file stream
> providers
> > > crbug.com/126902 is about upload for web sites like Gmail, not to Drive.
> > Got it. Do we have any plans to support streamed uploading in Drive then?
> No.
Got it.
> >
> > > > will support streamed uploading from beginning.
> > > Just mentioned to clarify that you shouldn't use our Drive client as an
> > example
> > > here.
> > > >
> > > > > > 4. Similarly, for too big buffer size, videos will load with a
> > significant
> > > > > > delay.
> > > > > > 5. For big buffer size, copying progress bars will be very
unreliable.
> > > > > >
> > > > > > Saying that, I'm still not convinced how having a large buffer, like
> > > couple
> > > > of
> > > > > > megabytes could be a good thing in general case. I mentioned several
> > > serious
> > > > > > problems above, and probably there is much more.
> > > > > It's not you who makes this decision.
> > > > > Anyone can modify the user code in fileapi layer to change the buffer
> size
> > > > with
> > > > > a good rationale without consulting you.
> > > >
> > > > I'm not sure if I understand your point. BufferedFileSreamReader sets
the
> > > buffer
> > > > to our preferred one, which is 512KB, and we don't care about the buffer
> > size
> > > > set in the fileapi layer.
> > > This class's purpose is to reduce the number of IPCs, right?
> > > If the fileapi is changed to use a chunk size larger than 512KB, it means
> that
> > > we will end up doing the opposite thing, even after the technology
> advancement
> > > or something makes 512KB chunk size ridiculously small.
> > > Don't you want your code to work without problems for a decade or more?
> >
> > Purpose of this class is to set the buffer size to our preferred size. You
are
> > suggesting to write non-trivial code which will be most probably not used
for
> > years and will not improve performance. That's why I'm not convinced. But if
> you
> > insist, I can add it.
> This code may be used in years, while the code to handle the synchronously
> returned results won't be used until the end of universe.
I have plans to clean up file_system_provider/fileapi/file_stream_reader.cc,
which would return values synchronously, so not end of universe, but couple of
weeks? :)
> Why and how should we decide that the "preferred size" is 512 KB?
Do you have a suggestion for a better value than 512 KB? I decided upon
experiments. I guess similar was made for 32 KB as a buffer size for
FileStreamReaders.
> When the requested data size is larger than 512KB, this class should do
nothing.
In general I prefer arguments over "should". But anyway, I'm going to implement
it, to move forward.
> >
> > > >
> > > > > >
> > > > > > > Also, isn't 512KB already large enough to cause timeout with poor
> > > network
> > > > > > > connection?
> > > > > >
> > > > > > It may be too big. I'm thinking of a solution for this, eg. a dialog
> > like
> > > > > "This
> > > > > > operation takes longer than expected. Do you want to wait for it or
> > > abort?".
> > > > > Why do you need to implement a new UI when the operation is aborted
> thanks
> > > to
> > > > > the timeout?
> > > >
> > > > In case HTTP connection is very slow, and the providing extension didn't
> > > finish
> > > > fetching chunk of data from the server. As you said, it may happen.
> > > When the timeout happens, the operation gets aborted automatically.
> > > You don't need to implement any new UI for it.
> >
> > We can't blindly timeout each request after 10 seconds. It may happen that a
> > providing extension shows a dialog for credentials, and the operation would
> take
> > minutes. We don't want to abort in such case.
> Does it mean we show cancel dialog every time when the user takes long time to
> finish the authentication?
> Isn't it annoying?
> Shouldn't we add a dedicated API mechanism to let the provider extension
finish
> the authentication step?
Yes. If authentication takes long, a notification will pop up. More
sophisticated solution may come in the future.
> >
> > Please read the API proposals as well as the "File System Provider API
Request
> > Policy" (slightly outdated) I shared with the team. The timing out problem
is
> > described there in details.
> >
> > > >
> > > > > >
> > > > > > > >
> > > > > > > > So basically, there is IMHO no reason to add this code path.
> > > > > > > >
> > > > > > > > Hence, I added a comment to make it clear that this class
fetches
> > data
> > > > > from
> > > > > > > the
> > > > > > > > underlying file stream reader in chunks of up to |buffer_length|
> > size.
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
| |
39 | |
40 // Nothing copied, so contents have to be preloaded. | |
41 Preload(base::Bind(&BufferingFileStreamReader::OnPreloadCompleted, | |
42 weak_ptr_factory_.GetWeakPtr(), | |
43 make_scoped_refptr(buffer), | |
44 buffer_length, | |
45 callback)); | |
46 | |
47 return net::ERR_IO_PENDING; | |
48 } | |
49 | |
50 int64 BufferingFileStreamReader::GetLength( | |
51 const net::Int64CompletionCallback& callback) { | |
52 return file_stream_reader_->GetLength(callback); | |
53 } | |
54 | |
55 int BufferingFileStreamReader::CopyFromBuffer( | |
56 scoped_refptr<net::IOBuffer> buffer, | |
57 int buffer_length) { | |
58 const int read_bytes = std::min(buffer_length, buffered_bytes_); | |
59 | |
60 memcpy(buffer->data(), | |
61 preloading_buffer_->data() + preloading_buffer_offset_, | |
62 read_bytes); | |
63 preloading_buffer_offset_ += read_bytes; | |
64 buffered_bytes_ -= read_bytes; | |
65 | |
66 return read_bytes; | |
67 } | |
68 | |
69 void BufferingFileStreamReader::Preload( | |
70 const net::CompletionCallback& callback) { | |
71 // TODO(mtomasz): Dynamically calculate the chunk size. Start from a small | |
72 // one, then increase for consecutive requests. That would improve performance | |
73 // when reading just small chunks, instead of the entire file. | |
74 const int preload_bytes = buffer_size_; | |
75 | |
76 const int result = | |
77 file_stream_reader_->Read(preloading_buffer_, preload_bytes, callback); | |
78 | |
79 if (result != net::ERR_IO_PENDING) { | |
80 base::MessageLoopProxy::current()->PostTask(FROM_HERE, | |
81 base::Bind(callback, result)); | |
82 } | |
83 } | |
84 | |
85 void BufferingFileStreamReader::OnPreloadCompleted( | |
86 scoped_refptr<net::IOBuffer> buffer, | |
87 int buffer_length, | |
88 const net::CompletionCallback& callback, | |
89 int result) { | |
90 if (result < 0) { | |
91 callback.Run(result); | |
92 return; | |
93 } | |
94 | |
95 preloading_buffer_offset_ = 0; | |
96 buffered_bytes_ = result; | |
97 | |
98 callback.Run(CopyFromBuffer(buffer, buffer_length)); | |
99 } | |
100 | |
101 } // namespace file_system_provider | |
102 } // namespace chromeos | |
OLD | NEW |