| Index: common/proto/logdog/svcconfig/config.proto
|
| diff --git a/common/proto/logdog/svcconfig/config.proto b/common/proto/logdog/svcconfig/config.proto
|
| index 0c5251b26ad259ec8fe590464d4e39cb305a1814..a79cbfdeac5e2d854c0829f52efcf811acc0be19 100644
|
| --- a/common/proto/logdog/svcconfig/config.proto
|
| +++ b/common/proto/logdog/svcconfig/config.proto
|
| @@ -41,30 +41,32 @@ message Coordinator {
|
|
|
| // The name of the archive task queue.
|
| string archive_task_queue = 30;
|
| - // The amount of time after a log has been terminated before it is candidate
|
| +
|
| + // The grace period after a log has been terminated and before it is candidate
|
| // for archival.
|
| //
|
| - // Archival triggered by this delay will NOT succeed if any log entries are
|
| - // missing from intermediate storage.
|
| - //
|
| - // This should be based on a period of time where it's reasonable to expect
|
| - // that all log messages in the transport have arrived for a given log stream.
|
| - // Since the transport doesn't have to guarantee in-order delivery, this
|
| - // should allow for the case where the terminal log entry arrives before some
|
| - // of the intermediate log entries. This will help avoid triggering
|
| - // archive attempts that are doomed to fail because of standard transport lag.
|
| - google.protobuf.Duration archive_delay = 31;
|
| + // This grace period is purely an optimization, providing time for
|
| + // out-of-order logs to settle and be ingested into intermediate storage and
|
| + // avoiding wasted Archivist cycles.
|
| + google.protobuf.Duration archive_settle_delay = 31;
|
| +
|
| // The amount of time before a log stream is candidate for archival regardless
|
| // of whether or not it's been terminated or complete.
|
| //
|
| - // This endpoint is a failsafe designed to ensure that log streams with
|
| - // missing records or no terminal record (e.g., Butler crashed) are eventually
|
| - // moved out of intermediate storage.
|
| + // This is a failsafe designed to ensure that log streams with missing records
|
| + // or no terminal record (e.g., Butler crashed) are eventually archived.
|
| //
|
| - // This must be >= `archive_delay`, and should be fairly large (days) to allow
|
| - // for the log stream to complete and for all available log entries to be
|
| - // added to intermediate storage.
|
| + // This should be fairly large (days) to avoid prematurely archiving
|
| + // long-running streams, but should be considerably smaller than the
|
| + // intermediate storage data retention period.
|
| google.protobuf.Duration archive_delay_max = 32;
|
| +
|
| + // The maximum number of times to retry a failed stream archival before giving
|
| + // up and clearing the stream.
|
| + //
|
| + // This only affects fatal archival failures (e.g., data corruption), not
|
| + // partial log stream content or transient failures.
|
| + int32 archive_retries = 33;
|
| }
|
|
|
| // Collector is the set of configuration parameters for Collector instances.
|
|
|