Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Unified Diff: common/api/dm/service/v1/graph_data.proto

Issue 1537883002: Initial distributor implementation (Closed) Base URL: https://chromium.googlesource.com/external/github.com/luci/luci-go@master
Patch Set: self review Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: common/api/dm/service/v1/graph_data.proto
diff --git a/common/api/dm/service/v1/graph_data.proto b/common/api/dm/service/v1/graph_data.proto
index c0b7bb82d98d0400804e6294376098dd4b12ab2a..baff0bb974b8b01e8e5784d6ec9b9c71b4b923d7 100644
--- a/common/api/dm/service/v1/graph_data.proto
+++ b/common/api/dm/service/v1/graph_data.proto
@@ -5,6 +5,7 @@
syntax = "proto3";
import "google/protobuf/timestamp.proto";
+import "google/protobuf/duration.proto";
import "github.com/luci/luci-go/common/api/template/template.proto";
@@ -12,6 +13,81 @@ import "types.proto";
package dm;
+message AbnormalFinish {
iannucci 2016/06/08 02:54:24 Here are all the abnormal finish status types. The
+ enum Status {
+ // This entity has a failed result.
+ //
+ // Executions: the distributor reported that the task executed and failed, OR
+ // the distributor reports success while the Execution is in the RUNNING
+ // state.
+ //
+ // Attempts: the last Execution had a FAILED Status.
+ //
+ // Retryable.
+ FAILED = 0;
iannucci 2016/06/08 02:54:24 I'm not super sure how to deal with this one: this
dnj (Google) 2016/06/09 18:00:57 I think things pre-recipe-engine can fail (e.g., b
iannucci 2016/06/15 00:46:02 Yeah I think this makes sense. For recipe quests w
+
+ // This entity failed in a bad way.
+ //
+ // Executions: The distributor told us that the job died violently while in
+ // the SCHEDULING, RUNNING or STOPPING state.
+ //
+ // Attempts: the last Execution had a CRASHED Status.
+ //
+ // Retryable.
+ CRASHED = 1;
+
+ // Waited too long for the job to start.
+ //
+ // Executions: the distributor couldn't start the job in time, OR DM failed
+ // to get a status update from the distributor in time (e.g. the state was
+ // SCHEDULING for too long).
+ //
+ // Attempts: the last Execution had an EXPIRED Status.
+ //
+ // Retryable.
+ EXPIRED = 2;
+
+ // The job started, but took too long.
+ //
+ // Executions: the distributor started the job, but it couldn't complete in
+ // time, OR DM failed to get a status update from the distributor in time
+ // (e.g. the state was RUNNING for too long).
+ //
+ // Attempts: the last Execution had an TIMED_OUT Status.
+ //
+ // Retryable.
+ TIMED_OUT = 3;
+
+ // The job was cancelled by an external entity (human, automated system).
+ //
+ // Executions: the distributor informing DM that the job was preemptively
+ // cancelled.
+ //
+ // Attempts: the last Execution had a CANCELLED Status, or this Attempt
+ // was cancelled via DM.
+ CANCELLED = 4;
iannucci 2016/06/08 02:54:24 later when DM supports cancellation directly, this
dnj (Google) 2016/06/09 18:00:57 nit: In American English, the verb cancel is usual
iannucci 2016/06/15 00:46:01 Who said protos were American? THEY LIVE IN CYBERS
dnj (Google) 2016/06/16 16:57:22 I'ma tell Obama.
+
+ // The job was prevented from running by the distributor (quota, permissions,
+ // etc.)
+ //
+ // Executions: the distributor refused to run this job.
+ //
+ // Attempts: the last Execution had a REJECTED Status.
dnj (Google) 2016/06/09 18:00:57 Is this not retryable? If we're out of quota, we m
iannucci 2016/06/15 00:46:02 I think this should definitely be retried at a hig
+ REJECTED = 5;
+
+ // The job is unrecognized.
+ //
+ // Executions: the distributor doesn't know about this job, or has forgotten
+ // about it.
+ //
+ // Attempts: the last Execution had a REJECTED Status.
dnj (Google) 2016/06/09 18:00:57 MISSING Status?
iannucci 2016/06/15 00:46:01 oops
+ MISSING = 6;
+ }
+
+ Status status = 1;
+ string reason = 2;
+}
+
message Quest {
message ID {
string id = 1;
@@ -23,8 +99,51 @@ message Quest {
bool DNE = 2;
message Desc {
+ // TODO(iannucci): have a 'simple_idempotent' quest mode which:
+ // * isn't allowed/expected to call any API methods (ActivateExecution,
+ // EnsureGraphData, or WalkGraph)
+ // * only provides data back through the distributor-specific 'state'
+ // field.
+ //
+ // Examples of use for this would be:
+ // * simple test binaries that run/output to an ISOLATED_OUTDIR
+ // * testing / ad-hoc bash scripts
+
string distributor_config_name = 1;
string json_payload = 2;
+
+ message Meta {
+ // This names the user/service account for all Attempts on this quest. You
+ // must have permission to use this account when creating the Quest and/or
+ // Attempts.
+ string as_account = 1;
+
+ message Retry {
+ // The number of times in a row to retry Executions which have an
+ // ABNORMAL_FINISHED status of FAILED.
+ uint32 failed = 1;
+
+ // The number of times in a row to retry Executions which have an
+ // ABNORMAL_FINISHED status of EXPIRED.
+ uint32 expired = 2;
+
+ // The number of times in a row to retry Executions which have an
+ // ABNORMAL_FINISHED status of TIMED_OUT.
+ uint32 timed_out = 3;
+
+ // The number of times in a row to retry Executions which have an
+ // ABNORMAL_FINISHED status of CRASHED.
+ uint32 crashed = 4;
+ }
+
+ // This affects how DM will retry the job payload in various exceptional
+ // circumstances.
+ Retry retry = 2;
+ }
+
+ // This is metadata which doesn't affect the functionality of the payload,
+ // but does affect how DM and/or the distributor run/schedule that payload.
+ Meta meta = 3;
}
message TemplateSpec {
@@ -61,12 +180,20 @@ message Attempt {
bool DNE = 2;
enum State {
- NEEDS_EXECUTION = 0;
+ // The Attempt is waiting to be Executed
dnj (Google) 2016/06/09 18:00:57 nit: period at end.
iannucci 2016/06/15 00:46:02 Done.
+ SCHEDULING = 0;
+
+ // The Attempt is currently waiting for the current Execution to finish.
EXECUTING = 1;
dnj (Google) 2016/06/09 18:00:57 IMO: s/the current/its current/
iannucci 2016/06/15 00:46:02 Done.
- ADDING_DEPS = 2;
- BLOCKED = 3;
- AWAITING_EXECUTION_STATE = 4;
- FINISHED = 5;
+
+ // The Attempt is waiting for dependent Attempts to be resolved.
+ WAITING = 2;
+
+ // The Attempt is in its final state.
+ FINISHED = 3;
+
+ // The Attempt is in an abnormal final state
dnj (Google) 2016/06/09 18:00:57 nit: period at end.
iannucci 2016/06/15 00:46:02 Done.
+ ABNORMAL_FINISHED = 4;
}
message Data {
@@ -74,35 +201,37 @@ message Attempt {
google.protobuf.Timestamp modified = 2;
uint32 num_executions = 3;
- message NeedsExecution {
- google.protobuf.Timestamp pending = 1;
- }
+ // This attempt is ready to be Executed, but hasn't been sent to the
+ // distributor yet.
+ message Scheduling {}
+ // This attempt has a live Execution (with the specified ID). Check the
+ // Execution state for more information.
message Executing {
uint32 cur_execution_id = 1;
}
- message AddingDeps {
- uint32 num_adding = 1;
- uint32 num_waiting = 2;
- }
-
- message Blocked {
+ // This attempt's last Execution stopped by adding dependencies.
+ message Waiting {
uint32 num_waiting = 1;
}
+ // This attempt is complete.
message Finished {
google.protobuf.Timestamp expiration = 1;
uint32 json_result_size = 2;
string json_result = 3;
+
+ // This is the distributor-specific state of the final Execution.
+ string persistent_state_result = 4;
dnj (Google) 2016/06/09 18:00:57 Is "bytes" more appropriate? "string" implies UTF8
iannucci 2016/06/15 00:46:02 but string is the only map-key/immutable thing in
dnj (Google) 2016/06/16 16:57:22 Yeah I hear 'ya. But I think you did the right thi
}
oneof attempt_type {
- NeedsExecution needs_execution = 4;
- Executing executing = 5;
- AddingDeps adding_deps = 6;
- Blocked blocked = 7;
+ Scheduling scheduling = 5;
+ Executing executing = 6;
+ Waiting waiting = 7;
Finished finished = 8;
+ AbnormalFinish abnormal_finish = 9;
}
}
Data data = 3;
@@ -170,39 +299,53 @@ message Execution {
ID id = 1;
enum State {
- // The execution has been accepted by the distributor, but is not running yet
- SCHEDULED = 0;
+ // The execution has been accepted by the distributor, but is not running
+ // yet.
+ SCHEDULING = 0;
- // The execution is running
+ // The execution is running (has activated with DM).
RUNNING = 1;
- // The execution was unable to be accepted by the distributor
- REJECTED = 2;
+ // The execution has been told to stop by DM, but we haven't heard from
+ // the distributor yet.
+ STOPPING = 2;
- // The execution was accepted by the distributor, but couldn't run in time.
- TIMED_OUT = 3;
+ // The execution is in its final state.
+ FINISHED = 3;
- // The execution ran and completed
- FINISHED = 4;
+ // The execution is in an abnormal final state
+ ABNORMAL_FINISHED = 4;
+ }
+
+ message Data {
+ google.protobuf.Timestamp created = 1;
+ google.protobuf.Timestamp modified = 2;
- // The execution ran, but the distributor claims it did not complete
- FAILED = 5;
+ message DistributorInfo {
+ string config_name = 1;
+ string config_version = 2;
+ string token = 3;
+ string url = 4;
+ }
+ DistributorInfo distributor_info = 3;
- // The distributor claims to not know anything about this execution
- MISSING = 6;
+ message Scheduling {}
- // Some entity (DM, Human, Distributor) requested that this execution not run.
- CANCELLED = 7;
- }
+ message Running {}
- message Data {
- State state = 1;
- string state_reason = 2;
+ message Stopping {}
- google.protobuf.Timestamp created = 3;
+ message Finished {
+ string persistent_state = 1;
+ }
- string distributor_token = 4;
- string distributor_info_url = 5;
+ oneof execution_type {
+ Scheduling scheduling = 4;
+ Running running = 5;
+ Stopping stopping = 6;
+ Finished finished = 7;
+ AbnormalFinish abnormal_finish = 8;
+ }
}
Data data = 2;

Powered by Google App Engine
This is Rietveld 408576698