OLD | NEW |
1 { | 1 { |
| 2 "auth": { |
| 3 "oauth2": { |
| 4 "scopes": { |
| 5 "https://www.googleapis.com/auth/cloud-platform": { |
| 6 "description": "View and manage your data across Google Clou
d Platform services" |
| 7 }, |
| 8 "https://www.googleapis.com/auth/userinfo.email": { |
| 9 "description": "View your email address" |
| 10 } |
| 11 } |
| 12 } |
| 13 }, |
2 "basePath": "", | 14 "basePath": "", |
3 "baseUrl": "https://dataflow.googleapis.com/", | 15 "baseUrl": "https://dataflow.googleapis.com/", |
4 "batchPath": "batch", | 16 "batchPath": "batch", |
5 "description": "Google Dataflow API.", | 17 "description": "Google Dataflow API.", |
6 "discoveryVersion": "v1", | 18 "discoveryVersion": "v1", |
7 "documentationLink": "https://cloud.google.com/dataflow", | 19 "documentationLink": "https://cloud.google.com/dataflow", |
8 "etag": "\"ye6orv2F-1npMW3u9suM3a7C5Bo/OtVzMRRklw1RlkR7L_fUUSGrQuE\"", | 20 "etag": "\"ye6orv2F-1npMW3u9suM3a7C5Bo/tjir0NSBUJPbjWGXQgblQTQtnAA\"", |
9 "icons": { | 21 "icons": { |
10 "x16": "http://www.google.com/images/icons/product/search-16.gif", | 22 "x16": "http://www.google.com/images/icons/product/search-16.gif", |
11 "x32": "http://www.google.com/images/icons/product/search-32.gif" | 23 "x32": "http://www.google.com/images/icons/product/search-32.gif" |
12 }, | 24 }, |
13 "id": "dataflow:v1b3", | 25 "id": "dataflow:v1b3", |
14 "kind": "discovery#restDescription", | 26 "kind": "discovery#restDescription", |
15 "name": "dataflow", | 27 "name": "dataflow", |
16 "ownerDomain": "google.com", | 28 "ownerDomain": "google.com", |
17 "ownerName": "Google", | 29 "ownerName": "Google", |
18 "parameters": { | 30 "parameters": { |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 "description": "V1 error format.", | 100 "description": "V1 error format.", |
89 "enumDescriptions": [ | 101 "enumDescriptions": [ |
90 "v1 error format", | 102 "v1 error format", |
91 "v2 error format" | 103 "v2 error format" |
92 ], | 104 ], |
93 "location": "query", | 105 "location": "query", |
94 "type": "string" | 106 "type": "string" |
95 } | 107 } |
96 }, | 108 }, |
97 "protocol": "rest", | 109 "protocol": "rest", |
98 "revision": "20150322", | 110 "resources": { |
| 111 "projects": { |
| 112 "resources": { |
| 113 "jobs": { |
| 114 "methods": { |
| 115 "create": { |
| 116 "description": "Creates a dataflow job.", |
| 117 "httpMethod": "POST", |
| 118 "id": "dataflow.projects.jobs.create", |
| 119 "parameterOrder": [ |
| 120 "projectId" |
| 121 ], |
| 122 "parameters": { |
| 123 "projectId": { |
| 124 "description": "The project which owns the j
ob.", |
| 125 "location": "path", |
| 126 "required": true, |
| 127 "type": "string" |
| 128 }, |
| 129 "view": { |
| 130 "description": "Level of information request
ed in response.", |
| 131 "enum": [ |
| 132 "JOB_VIEW_UNKNOWN", |
| 133 "JOB_VIEW_SUMMARY", |
| 134 "JOB_VIEW_ALL" |
| 135 ], |
| 136 "location": "query", |
| 137 "type": "string" |
| 138 }, |
| 139 "replaceJobId": { |
| 140 "description": "DEPRECATED. This field is no
w on the Job message.", |
| 141 "location": "query", |
| 142 "type": "string" |
| 143 } |
| 144 }, |
| 145 "path": "v1b3/projects/{projectId}/jobs", |
| 146 "request": { |
| 147 "$ref": "Job" |
| 148 }, |
| 149 "response": { |
| 150 "$ref": "Job" |
| 151 }, |
| 152 "scopes": [ |
| 153 "https://www.googleapis.com/auth/cloud-platform"
, |
| 154 "https://www.googleapis.com/auth/userinfo.email" |
| 155 ] |
| 156 }, |
| 157 "get": { |
| 158 "description": "Gets the state of the specified data
flow job.", |
| 159 "httpMethod": "GET", |
| 160 "id": "dataflow.projects.jobs.get", |
| 161 "parameterOrder": [ |
| 162 "projectId", |
| 163 "jobId" |
| 164 ], |
| 165 "parameters": { |
| 166 "projectId": { |
| 167 "description": "The project which owns the j
ob.", |
| 168 "location": "path", |
| 169 "required": true, |
| 170 "type": "string" |
| 171 }, |
| 172 "jobId": { |
| 173 "description": "Identifies a single job.", |
| 174 "location": "path", |
| 175 "required": true, |
| 176 "type": "string" |
| 177 }, |
| 178 "view": { |
| 179 "description": "Level of information request
ed in response.", |
| 180 "enum": [ |
| 181 "JOB_VIEW_UNKNOWN", |
| 182 "JOB_VIEW_SUMMARY", |
| 183 "JOB_VIEW_ALL" |
| 184 ], |
| 185 "location": "query", |
| 186 "type": "string" |
| 187 } |
| 188 }, |
| 189 "path": "v1b3/projects/{projectId}/jobs/{jobId}", |
| 190 "response": { |
| 191 "$ref": "Job" |
| 192 }, |
| 193 "scopes": [ |
| 194 "https://www.googleapis.com/auth/cloud-platform"
, |
| 195 "https://www.googleapis.com/auth/userinfo.email" |
| 196 ] |
| 197 }, |
| 198 "update": { |
| 199 "description": "Updates the state of an existing dat
aflow job.", |
| 200 "httpMethod": "PUT", |
| 201 "id": "dataflow.projects.jobs.update", |
| 202 "parameterOrder": [ |
| 203 "projectId", |
| 204 "jobId" |
| 205 ], |
| 206 "parameters": { |
| 207 "projectId": { |
| 208 "description": "The project which owns the j
ob.", |
| 209 "location": "path", |
| 210 "required": true, |
| 211 "type": "string" |
| 212 }, |
| 213 "jobId": { |
| 214 "description": "Identifies a single job.", |
| 215 "location": "path", |
| 216 "required": true, |
| 217 "type": "string" |
| 218 } |
| 219 }, |
| 220 "path": "v1b3/projects/{projectId}/jobs/{jobId}", |
| 221 "request": { |
| 222 "$ref": "Job" |
| 223 }, |
| 224 "response": { |
| 225 "$ref": "Job" |
| 226 }, |
| 227 "scopes": [ |
| 228 "https://www.googleapis.com/auth/cloud-platform"
, |
| 229 "https://www.googleapis.com/auth/userinfo.email" |
| 230 ] |
| 231 }, |
| 232 "list": { |
| 233 "description": "List the jobs of a project", |
| 234 "httpMethod": "GET", |
| 235 "id": "dataflow.projects.jobs.list", |
| 236 "parameterOrder": [ |
| 237 "projectId" |
| 238 ], |
| 239 "parameters": { |
| 240 "projectId": { |
| 241 "description": "The project which owns the j
obs.", |
| 242 "location": "path", |
| 243 "required": true, |
| 244 "type": "string" |
| 245 }, |
| 246 "view": { |
| 247 "description": "Level of information request
ed in response. Default is SUMMARY.", |
| 248 "enum": [ |
| 249 "JOB_VIEW_UNKNOWN", |
| 250 "JOB_VIEW_SUMMARY", |
| 251 "JOB_VIEW_ALL" |
| 252 ], |
| 253 "location": "query", |
| 254 "type": "string" |
| 255 }, |
| 256 "pageSize": { |
| 257 "description": "If there are many jobs, limi
t response to at most this many. The actual number of jobs returned will be the
lesser of max_responses and an unspecified server-defined limit.", |
| 258 "format": "int32", |
| 259 "location": "query", |
| 260 "type": "integer" |
| 261 }, |
| 262 "pageToken": { |
| 263 "description": "Set this to the 'next_page_t
oken' field of a previous response to request additional results in a long list.
", |
| 264 "location": "query", |
| 265 "type": "string" |
| 266 } |
| 267 }, |
| 268 "path": "v1b3/projects/{projectId}/jobs", |
| 269 "response": { |
| 270 "$ref": "ListJobsResponse" |
| 271 }, |
| 272 "scopes": [ |
| 273 "https://www.googleapis.com/auth/cloud-platform"
, |
| 274 "https://www.googleapis.com/auth/userinfo.email" |
| 275 ] |
| 276 }, |
| 277 "getMetrics": { |
| 278 "description": "Request the job status.", |
| 279 "httpMethod": "GET", |
| 280 "id": "dataflow.projects.jobs.getMetrics", |
| 281 "parameterOrder": [ |
| 282 "projectId", |
| 283 "jobId" |
| 284 ], |
| 285 "parameters": { |
| 286 "projectId": { |
| 287 "description": "A project id.", |
| 288 "location": "path", |
| 289 "required": true, |
| 290 "type": "string" |
| 291 }, |
| 292 "jobId": { |
| 293 "description": "The job to get messages for.
", |
| 294 "location": "path", |
| 295 "required": true, |
| 296 "type": "string" |
| 297 }, |
| 298 "startTime": { |
| 299 "description": "Return only metric data that
has changed since this time. Default is to return all information about all met
rics for the job.", |
| 300 "location": "query", |
| 301 "type": "string" |
| 302 } |
| 303 }, |
| 304 "path": "v1b3/projects/{projectId}/jobs/{jobId}/metr
ics", |
| 305 "response": { |
| 306 "$ref": "JobMetrics" |
| 307 }, |
| 308 "scopes": [ |
| 309 "https://www.googleapis.com/auth/cloud-platform"
, |
| 310 "https://www.googleapis.com/auth/userinfo.email" |
| 311 ] |
| 312 } |
| 313 }, |
| 314 "resources": { |
| 315 "messages": { |
| 316 "methods": { |
| 317 "list": { |
| 318 "description": "Request the job status.", |
| 319 "httpMethod": "GET", |
| 320 "id": "dataflow.projects.jobs.messages.list"
, |
| 321 "parameterOrder": [ |
| 322 "projectId", |
| 323 "jobId" |
| 324 ], |
| 325 "parameters": { |
| 326 "projectId": { |
| 327 "description": "A project id.", |
| 328 "location": "path", |
| 329 "required": true, |
| 330 "type": "string" |
| 331 }, |
| 332 "jobId": { |
| 333 "description": "The job to get messa
ges about.", |
| 334 "location": "path", |
| 335 "required": true, |
| 336 "type": "string" |
| 337 }, |
| 338 "minimumImportance": { |
| 339 "description": "Filter to only get m
essages with importance >= level", |
| 340 "enum": [ |
| 341 "JOB_MESSAGE_IMPORTANCE_UNKNOWN"
, |
| 342 "JOB_MESSAGE_DEBUG", |
| 343 "JOB_MESSAGE_DETAILED", |
| 344 "JOB_MESSAGE_BASIC", |
| 345 "JOB_MESSAGE_WARNING", |
| 346 "JOB_MESSAGE_ERROR" |
| 347 ], |
| 348 "location": "query", |
| 349 "type": "string" |
| 350 }, |
| 351 "pageSize": { |
| 352 "description": "If specified, determ
ines the maximum number of messages to return. If unspecified, the service may c
hoose an appropriate default, or may return an arbitrarily large number of resul
ts.", |
| 353 "format": "int32", |
| 354 "location": "query", |
| 355 "type": "integer" |
| 356 }, |
| 357 "pageToken": { |
| 358 "description": "If supplied, this sh
ould be the value of next_page_token returned by an earlier call. This will caus
e the next page of results to be returned.", |
| 359 "location": "query", |
| 360 "type": "string" |
| 361 }, |
| 362 "startTime": { |
| 363 "description": "If specified, return
only messages with timestamps >= start_time. The default is the job creation ti
me (i.e. beginning of messages).", |
| 364 "location": "query", |
| 365 "type": "string" |
| 366 }, |
| 367 "endTime": { |
| 368 "description": "Return only messages
with timestamps < end_time. The default is now (i.e. return up to the latest me
ssages available).", |
| 369 "location": "query", |
| 370 "type": "string" |
| 371 } |
| 372 }, |
| 373 "path": "v1b3/projects/{projectId}/jobs/{job
Id}/messages", |
| 374 "response": { |
| 375 "$ref": "ListJobMessagesResponse" |
| 376 }, |
| 377 "scopes": [ |
| 378 "https://www.googleapis.com/auth/cloud-p
latform", |
| 379 "https://www.googleapis.com/auth/userinf
o.email" |
| 380 ] |
| 381 } |
| 382 } |
| 383 }, |
| 384 "workItems": { |
| 385 "methods": { |
| 386 "reportStatus": { |
| 387 "description": "Reports the status of datafl
ow WorkItems leased by a worker.", |
| 388 "httpMethod": "POST", |
| 389 "id": "dataflow.projects.jobs.workItems.repo
rtStatus", |
| 390 "parameterOrder": [ |
| 391 "projectId", |
| 392 "jobId" |
| 393 ], |
| 394 "parameters": { |
| 395 "projectId": { |
| 396 "description": "The project which ow
ns the WorkItem's job.", |
| 397 "location": "path", |
| 398 "required": true, |
| 399 "type": "string" |
| 400 }, |
| 401 "jobId": { |
| 402 "description": "The job which the Wo
rkItem is part of.", |
| 403 "location": "path", |
| 404 "required": true, |
| 405 "type": "string" |
| 406 } |
| 407 }, |
| 408 "path": "v1b3/projects/{projectId}/jobs/{job
Id}/workItems:reportStatus", |
| 409 "request": { |
| 410 "$ref": "ReportWorkItemStatusRequest" |
| 411 }, |
| 412 "response": { |
| 413 "$ref": "ReportWorkItemStatusResponse" |
| 414 }, |
| 415 "scopes": [ |
| 416 "https://www.googleapis.com/auth/cloud-p
latform", |
| 417 "https://www.googleapis.com/auth/userinf
o.email" |
| 418 ] |
| 419 }, |
| 420 "lease": { |
| 421 "description": "Leases a dataflow WorkItem t
o run.", |
| 422 "httpMethod": "POST", |
| 423 "id": "dataflow.projects.jobs.workItems.leas
e", |
| 424 "parameterOrder": [ |
| 425 "projectId", |
| 426 "jobId" |
| 427 ], |
| 428 "parameters": { |
| 429 "projectId": { |
| 430 "description": "Identifies the proje
ct this worker belongs to.", |
| 431 "location": "path", |
| 432 "required": true, |
| 433 "type": "string" |
| 434 }, |
| 435 "jobId": { |
| 436 "description": "Identifies the workf
low job this worker belongs to.", |
| 437 "location": "path", |
| 438 "required": true, |
| 439 "type": "string" |
| 440 } |
| 441 }, |
| 442 "path": "v1b3/projects/{projectId}/jobs/{job
Id}/workItems:lease", |
| 443 "request": { |
| 444 "$ref": "LeaseWorkItemRequest" |
| 445 }, |
| 446 "response": { |
| 447 "$ref": "LeaseWorkItemResponse" |
| 448 }, |
| 449 "scopes": [ |
| 450 "https://www.googleapis.com/auth/cloud-p
latform", |
| 451 "https://www.googleapis.com/auth/userinf
o.email" |
| 452 ] |
| 453 } |
| 454 } |
| 455 } |
| 456 } |
| 457 } |
| 458 } |
| 459 } |
| 460 }, |
| 461 "revision": "20150303", |
99 "rootUrl": "https://dataflow.googleapis.com/", | 462 "rootUrl": "https://dataflow.googleapis.com/", |
| 463 "schemas": { |
| 464 "Job": { |
| 465 "description": "Defines a job to be run by the Dataflow service.", |
| 466 "id": "Job", |
| 467 "properties": { |
| 468 "id": { |
| 469 "description": "The unique ID of this job. This field is set
by the Dataflow service when the Job is created, and is immutable for the life
of the Job.", |
| 470 "type": "string" |
| 471 }, |
| 472 "projectId": { |
| 473 "description": "The project which owns the job.", |
| 474 "type": "string" |
| 475 }, |
| 476 "name": { |
| 477 "description": "The user-specified Dataflow job name. Only o
ne Job with a given name may exist in a project at any given time. If a caller a
ttempts to create a Job with the same name as an already-existing Job, the attem
pt will return the existing Job. The name must match the regular expression [a-z
]([-a-z0-9]{0,38}[a-z0-9])?", |
| 478 "type": "string" |
| 479 }, |
| 480 "type": { |
| 481 "description": "The type of dataflow job.", |
| 482 "enum": [ |
| 483 "JOB_TYPE_UNKNOWN", |
| 484 "JOB_TYPE_BATCH", |
| 485 "JOB_TYPE_STREAMING" |
| 486 ], |
| 487 "type": "string" |
| 488 }, |
| 489 "environment": { |
| 490 "$ref": "Environment", |
| 491 "description": "Environment for the job." |
| 492 }, |
| 493 "steps": { |
| 494 "description": "The top-level steps that constitute the enti
re job.", |
| 495 "items": { |
| 496 "$ref": "Step" |
| 497 }, |
| 498 "type": "array" |
| 499 }, |
| 500 "currentState": { |
| 501 "description": "The current state of the job. Jobs are creat
ed in the JOB_STATE_STOPPED state unless otherwise specified. A job in the JOB_S
TATE_RUNNING state may asynchronously enter a terminal state. Once a job has rea
ched a terminal state, no further state updates may be made. This field may be m
utated by the Dataflow service; callers cannot mutate it.", |
| 502 "enum": [ |
| 503 "JOB_STATE_UNKNOWN", |
| 504 "JOB_STATE_STOPPED", |
| 505 "JOB_STATE_RUNNING", |
| 506 "JOB_STATE_DONE", |
| 507 "JOB_STATE_FAILED", |
| 508 "JOB_STATE_CANCELLED", |
| 509 "JOB_STATE_UPDATED" |
| 510 ], |
| 511 "type": "string" |
| 512 }, |
| 513 "currentStateTime": { |
| 514 "description": "The timestamp associated with the current st
ate.", |
| 515 "type": "string" |
| 516 }, |
| 517 "requestedState": { |
| 518 "description": "The job's requested state. UpdateJob may be
used to switch between the JOB_STATE_STOPPED and JOB_STATE_RUNNING states, by se
tting requested_state. UpdateJob may also be used to directly set a job's reques
ted state to JOB_STATE_CANCELLED or JOB_STATE_DONE, irrevocably terminating the
job if it has not already reached a terminal state.", |
| 519 "enum": [ |
| 520 "JOB_STATE_UNKNOWN", |
| 521 "JOB_STATE_STOPPED", |
| 522 "JOB_STATE_RUNNING", |
| 523 "JOB_STATE_DONE", |
| 524 "JOB_STATE_FAILED", |
| 525 "JOB_STATE_CANCELLED", |
| 526 "JOB_STATE_UPDATED" |
| 527 ], |
| 528 "type": "string" |
| 529 }, |
| 530 "executionInfo": { |
| 531 "$ref": "JobExecutionInfo", |
| 532 "description": "Information about how the Dataflow service w
ill actually run the job." |
| 533 }, |
| 534 "createTime": { |
| 535 "description": "Timestamp when job was initially created. Im
mutable, set by the Dataflow service.", |
| 536 "type": "string" |
| 537 }, |
| 538 "replaceJobId": { |
| 539 "description": "If this job is an update of an existing job,
this field will be the ID of the job it replaced. When sending a CreateJobReque
st, you can update a job by specifying it here. The job named here will be stopp
ed, and its intermediate state transferred to this job.", |
| 540 "type": "string" |
| 541 }, |
| 542 "transformNameMapping": { |
| 543 "additionalProperties": { |
| 544 "type": "string" |
| 545 }, |
| 546 "description": "Map of transform name prefixes of the job to
be replaced to the corresponding name prefixes of the new job.", |
| 547 "type": "object" |
| 548 }, |
| 549 "clientRequestId": { |
| 550 "description": "Client's unique identifier of the job, re-us
ed by SDK across retried attempts. If this field is set, the service will ensure
its uniqueness. That is, the request to create a job will fail if the service h
as knowledge of a previously submitted job with the same client's id and job nam
e. The caller may, for example, use this field to ensure idempotence of job crea
tion across retried attempts to create a job. By default, the field is empty and
, in that case, the service ignores it.", |
| 551 "type": "string" |
| 552 }, |
| 553 "replacedByJobId": { |
| 554 "description": "If another job is an update of this job (and
thus, this job is in JOB_STATE_UPDATED), this field will contain the ID of that
job.", |
| 555 "type": "string" |
| 556 } |
| 557 }, |
| 558 "type": "object" |
| 559 }, |
| 560 "Environment": { |
| 561 "description": "Describes the environment in which a Dataflow Job ru
ns.", |
| 562 "id": "Environment", |
| 563 "properties": { |
| 564 "tempStoragePrefix": { |
| 565 "description": "The prefix of the resources the system shoul
d use for temporary storage. The system will append the suffix \"/temp-{JOBNAME}
to this resource prefix, where {JOBNAME} is the value of the job_name field. Th
e resulting bucket and object prefix is used as the prefix of the resources used
to store temporary data needed during the job execution. NOTE: This will overri
de the value in taskrunner_settings. The supported resource type is: Google Clou
d Storage: storage.googleapis.com/{bucket}/{object} bucket.storage.googleapis.co
m/{object}", |
| 566 "type": "string" |
| 567 }, |
| 568 "clusterManagerApiService": { |
| 569 "description": "The type of cluster manager API to use. If u
nknown or unspecified, the service will attempt to choose a reasonable default.
This should be in the form of the API service name, e.g. \"compute.googleapis.co
m\".", |
| 570 "type": "string" |
| 571 }, |
| 572 "experiments": { |
| 573 "description": "The list of experiments to enable.", |
| 574 "items": { |
| 575 "type": "string" |
| 576 }, |
| 577 "type": "array" |
| 578 }, |
| 579 "workerPools": { |
| 580 "description": "Worker pools. At least one \"harness\" worke
r pool must be specified in order for the job to have workers.", |
| 581 "items": { |
| 582 "$ref": "WorkerPool" |
| 583 }, |
| 584 "type": "array" |
| 585 }, |
| 586 "userAgent": { |
| 587 "additionalProperties": { |
| 588 "description": "Properties of the object.", |
| 589 "type": "any" |
| 590 }, |
| 591 "description": "A description of the process that generated
the request.", |
| 592 "type": "object" |
| 593 }, |
| 594 "version": { |
| 595 "additionalProperties": { |
| 596 "description": "Properties of the object.", |
| 597 "type": "any" |
| 598 }, |
| 599 "description": "A structure describing which components and
their versions of the service are required in order to run the job.", |
| 600 "type": "object" |
| 601 }, |
| 602 "dataset": { |
| 603 "description": "The dataset for the current project where va
rious workflow related tables are stored. The supported resource type is: Google
BigQuery: bigquery.googleapis.com/{dataset}", |
| 604 "type": "string" |
| 605 }, |
| 606 "sdkPipelineOptions": { |
| 607 "additionalProperties": { |
| 608 "description": "Properties of the object.", |
| 609 "type": "any" |
| 610 }, |
| 611 "description": "The Dataflow SDK pipeline options specified
by the user. These options are passed through the service and are used to recrea
te the SDK pipeline options on the worker in a language agnostic and platform in
dependent way.", |
| 612 "type": "object" |
| 613 }, |
| 614 "internalExperiments": { |
| 615 "additionalProperties": { |
| 616 "description": "Properties of the object. Contains field
@ype with type URL.", |
| 617 "type": "any" |
| 618 }, |
| 619 "description": "Experimental settings.", |
| 620 "type": "object" |
| 621 } |
| 622 }, |
| 623 "type": "object" |
| 624 }, |
| 625 "WorkerPool": { |
| 626 "description": "Describes one particular pool of Dataflow workers to
be instantiated by the Dataflow service in order to perform the computations re
quired by a job. Note that a workflow job may use multiple pools, in order to ma
tch the various computational requirements of the various stages of the job.", |
| 627 "id": "WorkerPool", |
| 628 "properties": { |
| 629 "kind": { |
| 630 "description": "The kind of the worker pool; currently only
'harness' and 'shuffle' are supported.", |
| 631 "type": "string" |
| 632 }, |
| 633 "numWorkers": { |
| 634 "description": "Number of Google Compute Engine workers in t
his pool needed to execute the job. If zero or unspecified, the service will att
empt to choose a reasonable default.", |
| 635 "format": "int32", |
| 636 "type": "integer" |
| 637 }, |
| 638 "packages": { |
| 639 "description": "Packages to be installed on workers.", |
| 640 "items": { |
| 641 "$ref": "Package" |
| 642 }, |
| 643 "type": "array" |
| 644 }, |
| 645 "defaultPackageSet": { |
| 646 "description": "The default package set to install. This all
ows the service to select a default set of packages which are useful to worker h
arnesses written in a particular language.", |
| 647 "enum": [ |
| 648 "DEFAULT_PACKAGE_SET_UNKNOWN", |
| 649 "DEFAULT_PACKAGE_SET_NONE", |
| 650 "DEFAULT_PACKAGE_SET_JAVA", |
| 651 "DEFAULT_PACKAGE_SET_PYTHON" |
| 652 ], |
| 653 "type": "string" |
| 654 }, |
| 655 "machineType": { |
| 656 "description": "Machine type (e.g. \"n1-standard-1\"). If em
pty or unspecified, the service will attempt to choose a reasonable default.", |
| 657 "type": "string" |
| 658 }, |
| 659 "teardownPolicy": { |
| 660 "description": "Sets the policy for determining when to turn
down worker pool. Allowed values are: TEARDOWN_ALWAYS, TEARDOWN_ON_SUCCESS, and
TEARDOWN_NEVER. TEARDOWN_ALWAYS means workers are always torn down regardless of
whether the job succeeds. TEARDOWN_ON_SUCCESS means workers are torn down if th
e job succeeds. TEARDOWN_NEVER means the workers are never torn down. If the wor
kers are not torn down by the service, they will continue to run and use Google
Compute Engine VM resources in the user's project until they are explicitly term
inated by the user. Because of this, Google recommends using the TEARDOWN_ALWAYS
policy except for small, manually supervised test jobs. If unknown or unspecifi
ed, the service will attempt to choose a reasonable default.", |
| 661 "enum": [ |
| 662 "TEARDOWN_POLICY_UNKNOWN", |
| 663 "TEARDOWN_ALWAYS", |
| 664 "TEARDOWN_ON_SUCCESS", |
| 665 "TEARDOWN_NEVER" |
| 666 ], |
| 667 "type": "string" |
| 668 }, |
| 669 "diskSizeGb": { |
| 670 "description": "Size of root disk for VMs, in GB. If zero or
unspecified, the service will attempt to choose a reasonable default.", |
| 671 "format": "int32", |
| 672 "type": "integer" |
| 673 }, |
| 674 "diskType": { |
| 675 "description": "Type of root disk for VMs. If empty or unspe
cified, the service will attempt to choose a reasonable default.", |
| 676 "type": "string" |
| 677 }, |
| 678 "diskSourceImage": { |
| 679 "description": "Fully qualified source image for disks.", |
| 680 "type": "string" |
| 681 }, |
| 682 "zone": { |
| 683 "description": "Zone to run the worker pools in (e.g. \"us-c
entral1-b\"). If empty or unspecified, the service will attempt to choose a reas
onable default.", |
| 684 "type": "string" |
| 685 }, |
| 686 "taskrunnerSettings": { |
| 687 "$ref": "TaskRunnerSettings", |
| 688 "description": "Settings passed through to Google Compute En
gine workers when using the standard Dataflow task runner. Users should ignore t
his field." |
| 689 }, |
| 690 "onHostMaintenance": { |
| 691 "description": "The action to take on host maintenance, as d
efined by the Google Compute Engine API.", |
| 692 "type": "string" |
| 693 }, |
| 694 "dataDisks": { |
| 695 "description": "Data disks that are used by a VM in this wor
kflow.", |
| 696 "items": { |
| 697 "$ref": "Disk" |
| 698 }, |
| 699 "type": "array" |
| 700 }, |
| 701 "metadata": { |
| 702 "additionalProperties": { |
| 703 "type": "string" |
| 704 }, |
| 705 "description": "Metadata to set on the Google Compute Engine
VMs.", |
| 706 "type": "object" |
| 707 }, |
| 708 "autoscalingSettings": { |
| 709 "$ref": "AutoscalingSettings", |
| 710 "description": "Settings for autoscaling of this WorkerPool.
" |
| 711 }, |
| 712 "poolArgs": { |
| 713 "additionalProperties": { |
| 714 "description": "Properties of the object. Contains field
@ype with type URL.", |
| 715 "type": "any" |
| 716 }, |
| 717 "description": "Extra arguments for this worker pool.", |
| 718 "type": "object" |
| 719 }, |
| 720 "network": { |
| 721 "description": "Network to which VMs will be assigned. If em
pty or unspecified, the service will use the network \"default\".", |
| 722 "type": "string" |
| 723 } |
| 724 }, |
| 725 "type": "object" |
| 726 }, |
| 727 "Package": { |
| 728 "description": "Packages that need to be installed in order for a wo
rker to run the steps of the Dataflow job which will be assigned to its worker p
ool. This is the mechanism by which the SDK causes code to be loaded onto the wo
rkers. For example, the Dataflow Java SDK might use this to install jars contain
ing the user's code and all of the various dependencies (libraries, data files,
etc) required in order for that code to run.", |
| 729 "id": "Package", |
| 730 "properties": { |
| 731 "name": { |
| 732 "description": "The name of the package.", |
| 733 "type": "string" |
| 734 }, |
| 735 "location": { |
| 736 "description": "The resource to read the package from. The s
upported resource type is: Google Cloud Storage: storage.googleapis.com/{bucket}
bucket.storage.googleapis.com/", |
| 737 "type": "string" |
| 738 } |
| 739 }, |
| 740 "type": "object" |
| 741 }, |
| 742 "TaskRunnerSettings": { |
| 743 "description": "Taskrunner configuration settings.", |
| 744 "id": "TaskRunnerSettings", |
| 745 "properties": { |
| 746 "taskUser": { |
| 747 "description": "The UNIX user ID on the worker VM to use for
tasks launched by taskrunner; e.g. \"root\".", |
| 748 "type": "string" |
| 749 }, |
| 750 "taskGroup": { |
| 751 "description": "The UNIX group ID on the worker VM to use fo
r tasks launched by taskrunner; e.g. \"wheel\".", |
| 752 "type": "string" |
| 753 }, |
| 754 "oauthScopes": { |
| 755 "description": "OAuth2 scopes to be requested by the taskrun
ner in order to access the dataflow API.", |
| 756 "items": { |
| 757 "type": "string" |
| 758 }, |
| 759 "type": "array" |
| 760 }, |
| 761 "baseUrl": { |
| 762 "description": "The base URL for the taskrunner to use when
accessing Google Cloud APIs. When workers access Google Cloud APIs, they logical
ly do so via relative URLs. If this field is specified, it supplies the base URL
to use for resolving these relative URLs. The normative algorithm used is defin
ed by RFC 1808, \"Relative Uniform Resource Locators\". If not specified, the de
fault value is \"http://www.googleapis.com/\"", |
| 763 "type": "string" |
| 764 }, |
| 765 "dataflowApiVersion": { |
| 766 "description": "API version of endpoint, e.g. \"v1b3\"", |
| 767 "type": "string" |
| 768 }, |
| 769 "parallelWorkerSettings": { |
| 770 "$ref": "WorkerSettings", |
| 771 "description": "Settings to pass to the parallel worker harn
ess." |
| 772 }, |
| 773 "baseTaskDir": { |
| 774 "description": "Location on the worker for task-specific sub
directories.", |
| 775 "type": "string" |
| 776 }, |
| 777 "continueOnException": { |
| 778 "description": "Do we continue taskrunner if an exception is
hit?", |
| 779 "type": "boolean" |
| 780 }, |
| 781 "logToSerialconsole": { |
| 782 "description": "Send taskrunner log into to Google Compute E
ngine VM serial console?", |
| 783 "type": "boolean" |
| 784 }, |
| 785 "alsologtostderr": { |
| 786 "description": "Also send taskrunner log info to stderr?", |
| 787 "type": "boolean" |
| 788 }, |
| 789 "logUploadLocation": { |
| 790 "description": "Indicates where to put logs. If this is not
specified, the logs will not be uploaded. The supported resource type is: Google
Cloud Storage: storage.googleapis.com/{bucket}/{object} bucket.storage.googleap
is.com/{object}", |
| 791 "type": "string" |
| 792 }, |
| 793 "logDir": { |
| 794 "description": "Directory on the VM to store logs.", |
| 795 "type": "string" |
| 796 }, |
| 797 "tempStoragePrefix": { |
| 798 "description": "The prefix of the resources the taskrunner s
hould use for temporary storage. The supported resource type is: Google Cloud St
orage: storage.googleapis.com/{bucket}/{object} bucket.storage.googleapis.com/{o
bject}", |
| 799 "type": "string" |
| 800 }, |
| 801 "harnessCommand": { |
| 802 "description": "Command to launch the worker harness.", |
| 803 "type": "string" |
| 804 }, |
| 805 "workflowFileName": { |
| 806 "description": "Store the workflow in this file.", |
| 807 "type": "string" |
| 808 }, |
| 809 "commandlinesFileName": { |
| 810 "description": "Store preprocessing commands in this file.", |
| 811 "type": "string" |
| 812 }, |
| 813 "vmId": { |
| 814 "description": "ID string of VM.", |
| 815 "type": "string" |
| 816 }, |
| 817 "languageHint": { |
| 818 "description": "Suggested backend language.", |
| 819 "type": "string" |
| 820 }, |
| 821 "streamingWorkerMainClass": { |
| 822 "description": "Streaming worker main class name.", |
| 823 "type": "string" |
| 824 } |
| 825 }, |
| 826 "type": "object" |
| 827 }, |
| 828 "WorkerSettings": { |
| 829 "description": "Provides data to pass through to the worker harness.
", |
| 830 "id": "WorkerSettings", |
| 831 "properties": { |
| 832 "baseUrl": { |
| 833 "description": "The base URL for accessing Google Cloud APIs
. When workers access Google Cloud APIs, they logically do so via relative URLs.
If this field is specified, it supplies the base URL to use for resolving these
relative URLs. The normative algorithm used is defined by RFC 1808, \"Relative
Uniform Resource Locators\". If not specified, the default value is \"http://www
.googleapis.com/\"", |
| 834 "type": "string" |
| 835 }, |
| 836 "reportingEnabled": { |
| 837 "description": "Send work progress updates to service.", |
| 838 "type": "boolean" |
| 839 }, |
| 840 "servicePath": { |
| 841 "description": "The Dataflow service path relative to the ro
ot URL, for example, \"dataflow/v1b3/projects\".", |
| 842 "type": "string" |
| 843 }, |
| 844 "shuffleServicePath": { |
| 845 "description": "The Shuffle service path relative to the roo
t URL, for example, \"shuffle/v1beta1\".", |
| 846 "type": "string" |
| 847 }, |
| 848 "workerId": { |
| 849 "description": "ID of the worker running this pipeline.", |
| 850 "type": "string" |
| 851 }, |
| 852 "tempStoragePrefix": { |
| 853 "description": "The prefix of the resources the system shoul
d use for temporary storage. The supported resource type is: Google Cloud Storag
e: storage.googleapis.com/{bucket}/{object} bucket.storage.googleapis.com/{objec
t}", |
| 854 "type": "string" |
| 855 } |
| 856 }, |
| 857 "type": "object" |
| 858 }, |
| 859 "Disk": { |
| 860 "description": "Describes the data disk used by a workflow job.", |
| 861 "id": "Disk", |
| 862 "properties": { |
| 863 "sizeGb": { |
| 864 "description": "Size of disk in GB. If zero or unspecified,
the service will attempt to choose a reasonable default.", |
| 865 "format": "int32", |
| 866 "type": "integer" |
| 867 }, |
| 868 "diskType": { |
| 869 "description": "Disk storage type, as defined by Google Comp
ute Engine. This must be a disk type appropriate to the project and zone in whic
h the workers will run. If unknown or unspecified, the service will attempt to c
hoose a reasonable default. For example, the standard persistent disk type is a
resource name typically ending in \"pd-standard\". If SSD persistent disks are a
vailable, the resource name typically ends with \"pd-ssd\". The actual valid val
ues are defined the Google Compute Engine API, not by the Dataflow API; consult
the Google Compute Engine documentation for more information about determining t
he set of available disk types for a particular project and zone. Google Compute
Engine Disk types are local to a particular project in a particular zone, and s
o the resource name will typically look something like this: compute.googleapis.
com/projects/\n/zones//diskTypes/pd-standard", |
| 870 "type": "string" |
| 871 }, |
| 872 "mountPoint": { |
| 873 "description": "Directory in a VM where disk is mounted.", |
| 874 "type": "string" |
| 875 } |
| 876 }, |
| 877 "type": "object" |
| 878 }, |
| 879 "AutoscalingSettings": { |
| 880 "description": "Settings for WorkerPool autoscaling.", |
| 881 "id": "AutoscalingSettings", |
| 882 "properties": { |
| 883 "algorithm": { |
| 884 "description": "The algorithm to use for autoscaling.", |
| 885 "enum": [ |
| 886 "AUTOSCALING_ALGORITHM_UNKNOWN", |
| 887 "AUTOSCALING_ALGORITHM_NONE", |
| 888 "AUTOSCALING_ALGORITHM_BASIC" |
| 889 ], |
| 890 "type": "string" |
| 891 }, |
| 892 "maxNumWorkers": { |
| 893 "description": "The maximum number of workers to cap scaling
at.", |
| 894 "format": "int32", |
| 895 "type": "integer" |
| 896 } |
| 897 }, |
| 898 "type": "object" |
| 899 }, |
| 900 "Step": { |
| 901 "description": "Defines a particular step within a Dataflow job. A j
ob consists of multiple steps, each of which performs some specific operation as
part of the overall job. Data is typically passed from one step to another as p
art of the job. Here's an example of a sequence of steps which together implemen
t a Map-Reduce job: * Read a collection of data from some source, parsing the co
llection's elements. * Validate the elements. * Apply a user-defined function to
map each element to some value and extract an element-specific key value. * Gro
up elements with the same key into a single element with that key, transforming
a multiply-keyed collection into a uniquely-keyed collection. * Write the elemen
ts out to some data sink. (Note that the Dataflow service may be used to run man
y different types of jobs, not just Map-Reduce).", |
| 902 "id": "Step", |
| 903 "properties": { |
| 904 "kind": { |
| 905 "description": "The kind of step in the dataflow Job.", |
| 906 "type": "string" |
| 907 }, |
| 908 "name": { |
| 909 "description": "Name identifying the step. This must be uniq
ue for each step with respect to all other steps in the dataflow Job.", |
| 910 "type": "string" |
| 911 }, |
| 912 "properties": { |
| 913 "additionalProperties": { |
| 914 "description": "Properties of the object.", |
| 915 "type": "any" |
| 916 }, |
| 917 "description": "Named properties associated with the step. E
ach kind of predefined step has its own required set of properties.", |
| 918 "type": "object" |
| 919 } |
| 920 }, |
| 921 "type": "object" |
| 922 }, |
| 923 "JobExecutionInfo": { |
| 924 "description": "Additional information about how a Dataflow job will
be executed which isn’t contained in the submitted job.", |
| 925 "id": "JobExecutionInfo", |
| 926 "properties": { |
| 927 "stages": { |
| 928 "additionalProperties": { |
| 929 "$ref": "JobExecutionStageInfo" |
| 930 }, |
| 931 "description": "A mapping from each stage to the information
about that stage.", |
| 932 "type": "object" |
| 933 } |
| 934 }, |
| 935 "type": "object" |
| 936 }, |
| 937 "JobExecutionStageInfo": { |
| 938 "description": "Contains information about how a particular [google.
dataflow.v1beta3.Step][google.dataflow.v1beta3.Step] will be executed.", |
| 939 "id": "JobExecutionStageInfo", |
| 940 "properties": { |
| 941 "stepName": { |
| 942 "description": "The steps associated with the execution stag
e. Note that stages may have several steps, and that a given step might be run b
y more than one stage.", |
| 943 "items": { |
| 944 "type": "string" |
| 945 }, |
| 946 "type": "array" |
| 947 } |
| 948 }, |
| 949 "type": "object" |
| 950 }, |
| 951 "ListJobsResponse": { |
| 952 "description": "Response to a request to list Dataflow jobs. This ma
y be a partial response, depending on the page size in the ListJobsRequest.", |
| 953 "id": "ListJobsResponse", |
| 954 "properties": { |
| 955 "jobs": { |
| 956 "description": "A subset of the requested job information.", |
| 957 "items": { |
| 958 "$ref": "Job" |
| 959 }, |
| 960 "type": "array" |
| 961 }, |
| 962 "nextPageToken": { |
| 963 "description": "Set if there may be more results than fit in
this response.", |
| 964 "type": "string" |
| 965 } |
| 966 }, |
| 967 "type": "object" |
| 968 }, |
| 969 "ListJobMessagesResponse": { |
| 970 "description": "Response to a request to list job messages.", |
| 971 "id": "ListJobMessagesResponse", |
| 972 "properties": { |
| 973 "jobMessages": { |
| 974 "description": "Messages in ascending timestamp order.", |
| 975 "items": { |
| 976 "$ref": "JobMessage" |
| 977 }, |
| 978 "type": "array" |
| 979 }, |
| 980 "nextPageToken": { |
| 981 "description": "The token to obtain the next page of results
if there are more.", |
| 982 "type": "string" |
| 983 } |
| 984 }, |
| 985 "type": "object" |
| 986 }, |
| 987 "JobMessage": { |
| 988 "description": "A particular message pertaining to a Dataflow job.", |
| 989 "id": "JobMessage", |
| 990 "properties": { |
| 991 "id": { |
| 992 "description": "Identifies the message. This is automaticall
y generated by the service; the caller should treat it as an opaque string.", |
| 993 "type": "string" |
| 994 }, |
| 995 "time": { |
| 996 "description": "The timestamp of the message.", |
| 997 "type": "string" |
| 998 }, |
| 999 "messageText": { |
| 1000 "description": "The text of the message.", |
| 1001 "type": "string" |
| 1002 }, |
| 1003 "messageImportance": { |
| 1004 "description": "Importance level of the message.", |
| 1005 "enum": [ |
| 1006 "JOB_MESSAGE_IMPORTANCE_UNKNOWN", |
| 1007 "JOB_MESSAGE_DEBUG", |
| 1008 "JOB_MESSAGE_DETAILED", |
| 1009 "JOB_MESSAGE_BASIC", |
| 1010 "JOB_MESSAGE_WARNING", |
| 1011 "JOB_MESSAGE_ERROR" |
| 1012 ], |
| 1013 "type": "string" |
| 1014 } |
| 1015 }, |
| 1016 "type": "object" |
| 1017 }, |
| 1018 "JobMetrics": { |
| 1019 "description": "JobMetrics contains a collection of metrics descibin
g the detailed progress of a Dataflow job. Metrics correspond to user-defined an
d system-defined metrics in the job. This resource captures only the most recent
values of each metric; time-series data can be queried for them (under the same
metric names) from Cloud Monitoring.", |
| 1020 "id": "JobMetrics", |
| 1021 "properties": { |
| 1022 "metricTime": { |
| 1023 "description": "Timestamp as of which metric values are curr
ent.", |
| 1024 "type": "string" |
| 1025 }, |
| 1026 "metrics": { |
| 1027 "description": "All metrics for this job.", |
| 1028 "items": { |
| 1029 "$ref": "MetricUpdate" |
| 1030 }, |
| 1031 "type": "array" |
| 1032 } |
| 1033 }, |
| 1034 "type": "object" |
| 1035 }, |
| 1036 "MetricUpdate": { |
| 1037 "description": "Describes the state of a metric.", |
| 1038 "id": "MetricUpdate", |
| 1039 "properties": { |
| 1040 "name": { |
| 1041 "$ref": "MetricStructuredName", |
| 1042 "description": "Name of the metric." |
| 1043 }, |
| 1044 "kind": { |
| 1045 "description": "Metric aggregation kind. The possible metric
aggregation kinds are \"Sum\", \"Max\", \"Min\", \"Mean\", \"Set\", \"And\", an
d \"Or\". The specified aggregation kind is case-insensitive. If omitted, this i
s not an aggregated value but instead a single metric sample value.", |
| 1046 "type": "string" |
| 1047 }, |
| 1048 "cumulative": { |
| 1049 "description": "True if this metric is reported as the total
cumulative aggregate value accumulated since the worker started working on this
WorkItem. By default this is false, indicating that this metric is reported as
a delta that is not associated with any WorkItem.", |
| 1050 "type": "boolean" |
| 1051 }, |
| 1052 "scalar": { |
| 1053 "description": "Worker-computed aggregate value for aggregat
ion kinds \"Sum\", \"Max\", \"Min\", \"And\", and \"Or\". The possible value typ
es are Long, Double, and Boolean.", |
| 1054 "type": "any" |
| 1055 }, |
| 1056 "meanSum": { |
| 1057 "description": "Worker-computed aggregate value for the \"Me
an\" aggregation kind. This holds the sum of the aggregated values and is used i
n combination with mean_count below to obtain the actual mean aggregate value. T
he only possible value types are Long and Double.", |
| 1058 "type": "any" |
| 1059 }, |
| 1060 "meanCount": { |
| 1061 "description": "Worker-computed aggregate value for the \"Me
an\" aggregation kind. This holds the count of the aggregated values and is used
in combination with mean_sum above to obtain the actual mean aggregate value. T
he only possible value type is Long.", |
| 1062 "type": "any" |
| 1063 }, |
| 1064 "set": { |
| 1065 "description": "Worker-computed aggregate value for the \"Se
t\" aggregation kind. The only possible value type is a list of Values whose typ
e can be Long, Double, or String, according to the metric's type. All Values in
the list must be of the same type.", |
| 1066 "type": "any" |
| 1067 }, |
| 1068 "internal": { |
| 1069 "description": "Worker-computed aggregate value for internal
use by the Dataflow service.", |
| 1070 "type": "any" |
| 1071 }, |
| 1072 "updateTime": { |
| 1073 "description": "Timestamp associated with the metric value.
Optional when workers are reporting work progress; it will be filled in response
s from the metrics API.", |
| 1074 "type": "string" |
| 1075 } |
| 1076 }, |
| 1077 "type": "object" |
| 1078 }, |
| 1079 "MetricStructuredName": { |
| 1080 "description": "Identifies a metric, by describing the source which
generated the metric.", |
| 1081 "id": "MetricStructuredName", |
| 1082 "properties": { |
| 1083 "origin": { |
| 1084 "description": "Origin (namespace) of metric name. May be bl
ank for user-define metrics; will be \"dataflow\" for metrics defined by the Dat
aflow service or SDK.", |
| 1085 "type": "string" |
| 1086 }, |
| 1087 "name": { |
| 1088 "description": "Worker-defined metric name.", |
| 1089 "type": "string" |
| 1090 }, |
| 1091 "context": { |
| 1092 "additionalProperties": { |
| 1093 "type": "string" |
| 1094 }, |
| 1095 "description": "Zero or more labeled fields which identify t
he part of the job this metric is associated with, such as the name of a step or
collection. For example, built-in counters associated with steps will have cont
ext['step'] = . Counters associated with PCollections in the SDK will have conte
xt['pcollection'] =\n.", |
| 1096 "type": "object" |
| 1097 } |
| 1098 }, |
| 1099 "type": "object" |
| 1100 }, |
| 1101 "ReportWorkItemStatusRequest": { |
| 1102 "description": "Request to report the status of WorkItems.", |
| 1103 "id": "ReportWorkItemStatusRequest", |
| 1104 "properties": { |
| 1105 "workerId": { |
| 1106 "description": "The ID of the worker reporting the WorkItem
status. If this does not match the ID of the worker which the Dataflow service b
elieves currently has the lease on the WorkItem, the report will be dropped (wit
h an error response).", |
| 1107 "type": "string" |
| 1108 }, |
| 1109 "workItemStatuses": { |
| 1110 "description": "The order is unimportant, except that the or
der of the WorkItemServiceState messages in the ReportWorkItemStatusResponse cor
responds to the order of WorkItemStatus messages here.", |
| 1111 "items": { |
| 1112 "$ref": "WorkItemStatus" |
| 1113 }, |
| 1114 "type": "array" |
| 1115 }, |
| 1116 "currentWorkerTime": { |
| 1117 "description": "The current timestamp at the worker.", |
| 1118 "type": "string" |
| 1119 } |
| 1120 }, |
| 1121 "type": "object" |
| 1122 }, |
| 1123 "WorkItemStatus": { |
| 1124 "description": "Conveys a worker's progress through the work describ
ed by a WorkItem.", |
| 1125 "id": "WorkItemStatus", |
| 1126 "properties": { |
| 1127 "workItemId": { |
| 1128 "description": "Identifies the WorkItem.", |
| 1129 "type": "string" |
| 1130 }, |
| 1131 "reportIndex": { |
| 1132 "description": "The report index. When a WorkItem is leased,
the lease will contain an initial report index. When a WorkItem's status is rep
orted to the system, the report should be sent with that report index, and the r
esponse will contain the index the worker should use for the next report. Report
s received with unexpected index values will be rejected by the service. In orde
r to preserve idempotency, the worker should not alter the contents of a report,
even if the worker must submit the same report multiple times before getting ba
ck a response. The worker should not submit a subsequent report until the respon
se for the previous report had been received from the service.", |
| 1133 "format": "int64", |
| 1134 "type": "string" |
| 1135 }, |
| 1136 "requestedLeaseDuration": { |
| 1137 "description": "Amount of time the worker requests for its l
ease.", |
| 1138 "type": "string" |
| 1139 }, |
| 1140 "completed": { |
| 1141 "description": "True if the WorkItem was completed (successf
ully or unsuccessfully).", |
| 1142 "type": "boolean" |
| 1143 }, |
| 1144 "errors": { |
| 1145 "description": "Specifies errors which occurred during proce
ssing. If errors are provided, and completed = true, then the WorkItem is consid
ered to have failed.", |
| 1146 "items": { |
| 1147 "$ref": "Status" |
| 1148 }, |
| 1149 "type": "array" |
| 1150 }, |
| 1151 "metricUpdates": { |
| 1152 "description": "Worker output metrics (counters) for this Wo
rkItem.", |
| 1153 "items": { |
| 1154 "$ref": "MetricUpdate" |
| 1155 }, |
| 1156 "type": "array" |
| 1157 }, |
| 1158 "progress": { |
| 1159 "$ref": "ApproximateProgress", |
| 1160 "description": "The WorkItem's approximate progress." |
| 1161 }, |
| 1162 "stopPosition": { |
| 1163 "$ref": "Position", |
| 1164 "description": "A worker may split an active map task in two
parts, \"primary\" and \"residual\", continuing to process the primary part and
returning the residual part into the pool of available work. This event is call
ed a \"dynamic split\" and is critical to the dynamic work rebalancing feature.
The two obtained sub-tasks are called \"parts\" of the split. The parts, if conc
atenated, must represent the same input as would be read by the current task if
the split did not happen. The exact way in which the original task is decomposed
into the two parts is specified either as a position demarcating them (stop_pos
ition), or explicitly as two DerivedSources, if this task consumes a user-define
d source type (dynamic_source_split). The \"current\" task is adjusted as a resu
lt of the split: after a task with range [A, B) sends a stop_position update at
C, its range is considered to be [A, C), e.g.: * Progress should be interpreted
relative to the new range, e.g. \"75% completed\" means \"75% of [A, C) complete
d\" * The worker should interpret proposed_stop_position relative to the new ran
ge, e.g. \"split at 68%\" should be interpreted as \"split at 68% of [A, C)\". *
If the worker chooses to split again using stop_position, only stop_positions i
n [A, C) will be accepted. * Etc. dynamic_source_split has similar semantics: e.
g., if a task with source S splits using dynamic_source_split into {P, R} (where
P and R must be together equivalent to S), then subsequent progress and propose
d_stop_position should be interpreted relative to P, and in a potential subseque
nt dynamic_source_split into {P', R'}, P' and R' must be together equivalent to
P, etc." |
| 1165 }, |
| 1166 "dynamicSourceSplit": { |
| 1167 "$ref": "DynamicSourceSplit", |
| 1168 "description": "See documentation of stop_position." |
| 1169 }, |
| 1170 "sourceOperationResponse": { |
| 1171 "$ref": "SourceOperationResponse", |
| 1172 "description": "If the work item represented a SourceOperati
onRequest, and the work is completed, contains the result of the operation." |
| 1173 }, |
| 1174 "sourceFork": { |
| 1175 "$ref": "SourceFork", |
| 1176 "description": "DEPRECATED in favor of dynamic_source_split.
" |
| 1177 } |
| 1178 }, |
| 1179 "type": "object" |
| 1180 }, |
| 1181 "Status": { |
| 1182 "description": "The `Status` type defines a logical error model that
is suitable for different programming environments, including REST APIs and RPC
APIs. It is used by [gRPC](https://github.com/grpc). The error model is designe
d to be: - Simple to use and understand for most users - Flexible enough to meet
unexpected needs # Overview The `Status` message contains three pieces of data:
error code, error message, and error details. The error code should be an enum
value of [google.rpc.Code][], but it may accept additional error codes if needed
. The error message should be a developer-facing English message that helps deve
lopers *understand* and *resolve* the error. If a localized user-facing error me
ssage is needed, put the localized message in the error details or localize it i
n the client. The optional error details may contain arbitrary information about
the error. There is a predefined set of error detail types in the package `goog
le.rpc` which can be used for common error conditions. # Language mapping The `S
tatus` message is the logical representation of the error model, but it is not n
ecessarily the actual wire format. When the `Status` message is exposed in diffe
rent client libraries and different wire protocols, it can be mapped differently
. For example, it will likely be mapped to some exceptions in Java, but more lik
ely mapped to some error codes in C. # Other uses The error model and the `Statu
s` message can be used in a variety of environments, either with or without APIs
, to provide a consistent developer experience across different environments. Ex
ample uses of this error model include: - Partial errors. If a service needs to
return partial errors to the client, it may embed the `Status` in the normal res
ponse to indicate the partial errors. - Workflow errors. A typical workflow has
multiple steps. Each step may have a `Status` message for error reporting purpos
e. - Batch operations. If a client uses batch request and batch response, the `S
tatus` message should be used directly inside batch response, one for each error
sub-response. - Asynchronous operations. If an API call embeds asynchronous ope
ration results in its response, the status of those operations should be represe
nted directly using the `Status` message. - Logging. If some API errors are stor
ed in logs, the message `Status` could be used directly after any stripping need
ed for security/privacy reasons.", |
| 1183 "id": "Status", |
| 1184 "properties": { |
| 1185 "code": { |
| 1186 "description": "The status code, which should be an enum val
ue of [google.rpc.Code][].", |
| 1187 "format": "int32", |
| 1188 "type": "integer" |
| 1189 }, |
| 1190 "message": { |
| 1191 "description": "A developer-facing error message, which shou
ld be in English. Any user-facing error message should be localized and sent in
the [google.rpc.Status.details][google.rpc.Status.details] field, or localized b
y the client.", |
| 1192 "type": "string" |
| 1193 }, |
| 1194 "details": { |
| 1195 "description": "A list of messages that carry the error deta
ils. There will be a common set of message types for APIs to use.", |
| 1196 "items": { |
| 1197 "additionalProperties": { |
| 1198 "description": "Properties of the object. Contains f
ield @ype with type URL.", |
| 1199 "type": "any" |
| 1200 }, |
| 1201 "type": "object" |
| 1202 }, |
| 1203 "type": "array" |
| 1204 } |
| 1205 }, |
| 1206 "type": "object" |
| 1207 }, |
| 1208 "ApproximateProgress": { |
| 1209 "description": "A progress measurement of a WorkItem by a worker.", |
| 1210 "id": "ApproximateProgress", |
| 1211 "properties": { |
| 1212 "position": { |
| 1213 "$ref": "Position", |
| 1214 "description": "A Position within the work to represent a pr
ogress." |
| 1215 }, |
| 1216 "percentComplete": { |
| 1217 "description": "Completion as percentage of the work, from 0
.0 (beginning, nothing complete), to 1.0 (end of the work range, entire WorkItem
complete).", |
| 1218 "format": "float", |
| 1219 "type": "number" |
| 1220 }, |
| 1221 "remainingTime": { |
| 1222 "description": "Completion as an estimated time remaining.", |
| 1223 "type": "string" |
| 1224 } |
| 1225 }, |
| 1226 "type": "object" |
| 1227 }, |
| 1228 "Position": { |
| 1229 "description": "Position defines a position within a collection of d
ata. The value can be either the end position, a key (used with ordered collecti
ons), a byte offset, or a record index.", |
| 1230 "id": "Position", |
| 1231 "properties": { |
| 1232 "end": { |
| 1233 "description": "Position is past all other positions. Also u
seful for the end position of an unbounded range.", |
| 1234 "type": "boolean" |
| 1235 }, |
| 1236 "key": { |
| 1237 "description": "Position is a string key, ordered lexicograp
hically.", |
| 1238 "type": "string" |
| 1239 }, |
| 1240 "byteOffset": { |
| 1241 "description": "Position is a byte offset.", |
| 1242 "format": "int64", |
| 1243 "type": "string" |
| 1244 }, |
| 1245 "recordIndex": { |
| 1246 "description": "Position is a record index.", |
| 1247 "format": "int64", |
| 1248 "type": "string" |
| 1249 }, |
| 1250 "shufflePosition": { |
| 1251 "description": "CloudPosition is a base64 encoded BatchShuff
lePosition (with FIXED sharding).", |
| 1252 "type": "string" |
| 1253 } |
| 1254 }, |
| 1255 "type": "object" |
| 1256 }, |
| 1257 "DynamicSourceSplit": { |
| 1258 "description": "When a task splits using WorkItemStatus.dynamic_sour
ce_split, this message describes the two parts of the split relative to the desc
ription of the current task's input.", |
| 1259 "id": "DynamicSourceSplit", |
| 1260 "properties": { |
| 1261 "primary": { |
| 1262 "$ref": "DerivedSource", |
| 1263 "description": "Primary part (continued to be processed by w
orker). Specified relative to the previously-current source. Becomes current." |
| 1264 }, |
| 1265 "residual": { |
| 1266 "$ref": "DerivedSource", |
| 1267 "description": "Residual part (returned to the pool of work)
. Specified relative to the previously-current source." |
| 1268 } |
| 1269 }, |
| 1270 "type": "object" |
| 1271 }, |
| 1272 "DerivedSource": { |
| 1273 "description": "Specification of one of the bundles produced as a re
sult of splitting a Source (e.g. when executing a SourceSplitRequest, or when sp
litting an active task using WorkItemStatus.dynamic_source_split), relative to t
he source being split.", |
| 1274 "id": "DerivedSource", |
| 1275 "properties": { |
| 1276 "source": { |
| 1277 "$ref": "Source", |
| 1278 "description": "Specification of the source." |
| 1279 }, |
| 1280 "derivationMode": { |
| 1281 "description": "What source to base the produced source on (
if any).", |
| 1282 "enum": [ |
| 1283 "SOURCE_DERIVATION_MODE_UNKNOWN", |
| 1284 "SOURCE_DERIVATION_MODE_INDEPENDENT", |
| 1285 "SOURCE_DERIVATION_MODE_CHILD_OF_CURRENT", |
| 1286 "SOURCE_DERIVATION_MODE_SIBLING_OF_CURRENT" |
| 1287 ], |
| 1288 "type": "string" |
| 1289 } |
| 1290 }, |
| 1291 "type": "object" |
| 1292 }, |
| 1293 "Source": { |
| 1294 "description": "A source that records can be read and decoded from."
, |
| 1295 "id": "Source", |
| 1296 "properties": { |
| 1297 "spec": { |
| 1298 "additionalProperties": { |
| 1299 "description": "Properties of the object.", |
| 1300 "type": "any" |
| 1301 }, |
| 1302 "description": "The source to read from, plus its parameters
.", |
| 1303 "type": "object" |
| 1304 }, |
| 1305 "codec": { |
| 1306 "additionalProperties": { |
| 1307 "description": "Properties of the object.", |
| 1308 "type": "any" |
| 1309 }, |
| 1310 "description": "The codec to use to decode data read from th
e source.", |
| 1311 "type": "object" |
| 1312 }, |
| 1313 "baseSpecs": { |
| 1314 "description": "While splitting, sources may specify the pro
duced bundles as differences against another source, in order to save backend-si
de memory and allow bigger jobs. For details, see SourceSplitRequest. To support
this use case, the full set of parameters of the source is logically obtained b
y taking the latest explicitly specified value of each parameter in the order: b
ase_specs (later items win), spec (overrides anything in base_specs).", |
| 1315 "items": { |
| 1316 "additionalProperties": { |
| 1317 "description": "Properties of the object.", |
| 1318 "type": "any" |
| 1319 }, |
| 1320 "type": "object" |
| 1321 }, |
| 1322 "type": "array" |
| 1323 }, |
| 1324 "metadata": { |
| 1325 "$ref": "SourceMetadata", |
| 1326 "description": "Optionally, metadata for this source can be
supplied right away, avoiding a SourceGetMetadataOperation roundtrip (see Source
OperationRequest). This field is meaningful only in the Source objects populated
by the user (e.g. when filling in a DerivedSource). Source objects supplied by
the framework to the user don't have this field populated." |
| 1327 }, |
| 1328 "doesNotNeedSplitting": { |
| 1329 "description": "Setting this value to true hints to the fram
ework that the source doesn't need splitting, and using SourceSplitRequest on it
would yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. E.g. a file splitter may set this
to true when splitting a single file into a set of byte ranges of appropriate s
ize, and set this to false when splitting a filepattern into individual files. H
owever, for efficiency, a file splitter may decide to produce file subranges dir
ectly from the filepattern to avoid a splitting round-trip. See SourceSplitReque
st for an overview of the splitting process. This field is meaningful only in th
e Source objects populated by the user (e.g. when filling in a DerivedSource). S
ource objects supplied by the framework to the user don't have this field popula
ted.", |
| 1330 "type": "boolean" |
| 1331 } |
| 1332 }, |
| 1333 "type": "object" |
| 1334 }, |
| 1335 "SourceMetadata": { |
| 1336 "description": "Metadata about a Source useful for automatically opt
imizing and tuning the pipeline, etc.", |
| 1337 "id": "SourceMetadata", |
| 1338 "properties": { |
| 1339 "producesSortedKeys": { |
| 1340 "description": "Whether this source is known to produce key/
value pairs with the (encoded) keys in lexicographically sorted order.", |
| 1341 "type": "boolean" |
| 1342 }, |
| 1343 "infinite": { |
| 1344 "description": "Specifies that the size of this source is kn
own to be infinite (this is a streaming source).", |
| 1345 "type": "boolean" |
| 1346 }, |
| 1347 "estimatedSizeBytes": { |
| 1348 "description": "An estimate of the total size (in bytes) of
the data that would be read from this source. This estimate is in terms of exter
nal storage size, before any decompression or other processing done by the reade
r.", |
| 1349 "format": "int64", |
| 1350 "type": "string" |
| 1351 } |
| 1352 }, |
| 1353 "type": "object" |
| 1354 }, |
| 1355 "SourceOperationResponse": { |
| 1356 "description": "The result of a SourceOperationRequest, specified in
ReportWorkItemStatusRequest.source_operation when the work item is completed.", |
| 1357 "id": "SourceOperationResponse", |
| 1358 "properties": { |
| 1359 "split": { |
| 1360 "$ref": "SourceSplitResponse", |
| 1361 "description": "A response to a request to split a source." |
| 1362 }, |
| 1363 "getMetadata": { |
| 1364 "$ref": "SourceGetMetadataResponse", |
| 1365 "description": "A response to a request to get metadata abou
t a source." |
| 1366 } |
| 1367 }, |
| 1368 "type": "object" |
| 1369 }, |
| 1370 "SourceSplitResponse": { |
| 1371 "description": "The response to a SourceSplitRequest.", |
| 1372 "id": "SourceSplitResponse", |
| 1373 "properties": { |
| 1374 "outcome": { |
| 1375 "description": "Indicates whether splitting happened and pro
duced a list of bundles. If this is USE_CURRENT_SOURCE_AS_IS, the current source
should be processed \"as is\" without splitting. \"bundles\" is ignored in this
case. If this is SPLITTING_HAPPENED, then \"bundles\" contains a list of bundle
s into which the source was split.", |
| 1376 "enum": [ |
| 1377 "SOURCE_SPLIT_OUTCOME_UNKNOWN", |
| 1378 "SOURCE_SPLIT_OUTCOME_USE_CURRENT", |
| 1379 "SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED" |
| 1380 ], |
| 1381 "type": "string" |
| 1382 }, |
| 1383 "bundles": { |
| 1384 "description": "If outcome is SPLITTING_HAPPENED, then this
is a list of bundles into which the source was split. Otherwise this field is ig
nored. This list can be empty, which means the source represents an empty input.
", |
| 1385 "items": { |
| 1386 "$ref": "DerivedSource" |
| 1387 }, |
| 1388 "type": "array" |
| 1389 }, |
| 1390 "shards": { |
| 1391 "description": "DEPRECATED in favor of bundles.", |
| 1392 "items": { |
| 1393 "$ref": "SourceSplitShard" |
| 1394 }, |
| 1395 "type": "array" |
| 1396 } |
| 1397 }, |
| 1398 "type": "object" |
| 1399 }, |
| 1400 "SourceSplitShard": { |
| 1401 "description": "DEPRECATED in favor of DerivedSource.", |
| 1402 "id": "SourceSplitShard", |
| 1403 "properties": { |
| 1404 "source": { |
| 1405 "$ref": "Source", |
| 1406 "description": "DEPRECATED" |
| 1407 }, |
| 1408 "derivationMode": { |
| 1409 "description": "DEPRECATED", |
| 1410 "enum": [ |
| 1411 "SOURCE_DERIVATION_MODE_UNKNOWN", |
| 1412 "SOURCE_DERIVATION_MODE_INDEPENDENT", |
| 1413 "SOURCE_DERIVATION_MODE_CHILD_OF_CURRENT", |
| 1414 "SOURCE_DERIVATION_MODE_SIBLING_OF_CURRENT" |
| 1415 ], |
| 1416 "type": "string" |
| 1417 } |
| 1418 }, |
| 1419 "type": "object" |
| 1420 }, |
| 1421 "SourceGetMetadataResponse": { |
| 1422 "description": "The result of a SourceGetMetadataOperation.", |
| 1423 "id": "SourceGetMetadataResponse", |
| 1424 "properties": { |
| 1425 "metadata": { |
| 1426 "$ref": "SourceMetadata", |
| 1427 "description": "The computed metadata." |
| 1428 } |
| 1429 }, |
| 1430 "type": "object" |
| 1431 }, |
| 1432 "SourceFork": { |
| 1433 "description": "DEPRECATED in favor of DynamicSourceSplit.", |
| 1434 "id": "SourceFork", |
| 1435 "properties": { |
| 1436 "primary": { |
| 1437 "$ref": "SourceSplitShard", |
| 1438 "description": "DEPRECATED" |
| 1439 }, |
| 1440 "residual": { |
| 1441 "$ref": "SourceSplitShard", |
| 1442 "description": "DEPRECATED" |
| 1443 }, |
| 1444 "primarySource": { |
| 1445 "$ref": "DerivedSource", |
| 1446 "description": "DEPRECATED" |
| 1447 }, |
| 1448 "residualSource": { |
| 1449 "$ref": "DerivedSource", |
| 1450 "description": "DEPRECATED" |
| 1451 } |
| 1452 }, |
| 1453 "type": "object" |
| 1454 }, |
| 1455 "ReportWorkItemStatusResponse": { |
| 1456 "description": "Response from a request to report the status of Work
Items.", |
| 1457 "id": "ReportWorkItemStatusResponse", |
| 1458 "properties": { |
| 1459 "workItemServiceStates": { |
| 1460 "description": "A set of messages indicating the service-sid
e state for each WorkItem whose status was reported, in the same order as the Wo
rkItemStatus messages in the ReportWorkItemStatusRequest which resulting in this
response.", |
| 1461 "items": { |
| 1462 "$ref": "WorkItemServiceState" |
| 1463 }, |
| 1464 "type": "array" |
| 1465 } |
| 1466 }, |
| 1467 "type": "object" |
| 1468 }, |
| 1469 "WorkItemServiceState": { |
| 1470 "description": "The Dataflow service's idea of the current state of
a WorkItem being processed by a worker.", |
| 1471 "id": "WorkItemServiceState", |
| 1472 "properties": { |
| 1473 "suggestedStopPoint": { |
| 1474 "$ref": "ApproximateProgress", |
| 1475 "description": "The progress point in the WorkItem where the
Dataflow service suggests that the worker truncate the task." |
| 1476 }, |
| 1477 "leaseExpireTime": { |
| 1478 "description": "Time at which the current lease will expire.
", |
| 1479 "type": "string" |
| 1480 }, |
| 1481 "reportStatusInterval": { |
| 1482 "description": "New recommended reporting interval.", |
| 1483 "type": "string" |
| 1484 }, |
| 1485 "harnessData": { |
| 1486 "additionalProperties": { |
| 1487 "description": "Properties of the object.", |
| 1488 "type": "any" |
| 1489 }, |
| 1490 "description": "Other data returned by the service, specific
to the particular worker harness.", |
| 1491 "type": "object" |
| 1492 }, |
| 1493 "nextReportIndex": { |
| 1494 "description": "The index value to use for the next report s
ent by the worker. Note: If the report call fails for whatever reason, the worke
r should reuse this index for subsequent report attempts.", |
| 1495 "format": "int64", |
| 1496 "type": "string" |
| 1497 }, |
| 1498 "suggestedStopPosition": { |
| 1499 "$ref": "Position", |
| 1500 "description": "Obsolete, always empty." |
| 1501 } |
| 1502 }, |
| 1503 "type": "object" |
| 1504 }, |
| 1505 "LeaseWorkItemRequest": { |
| 1506 "description": "Request to lease WorkItems.", |
| 1507 "id": "LeaseWorkItemRequest", |
| 1508 "properties": { |
| 1509 "workItemTypes": { |
| 1510 "description": "Filter for WorkItem type.", |
| 1511 "items": { |
| 1512 "type": "string" |
| 1513 }, |
| 1514 "type": "array" |
| 1515 }, |
| 1516 "workerCapabilities": { |
| 1517 "description": "Worker capabilities. WorkItems might be limi
ted to workers with specific capabilities.", |
| 1518 "items": { |
| 1519 "type": "string" |
| 1520 }, |
| 1521 "type": "array" |
| 1522 }, |
| 1523 "requestedLeaseDuration": { |
| 1524 "description": "The initial lease period.", |
| 1525 "type": "string" |
| 1526 }, |
| 1527 "currentWorkerTime": { |
| 1528 "description": "The current timestamp at the worker.", |
| 1529 "type": "string" |
| 1530 }, |
| 1531 "workerId": { |
| 1532 "description": "Identifies the worker leasing work -- typica
lly the ID of the virtual machine running the worker.", |
| 1533 "type": "string" |
| 1534 } |
| 1535 }, |
| 1536 "type": "object" |
| 1537 }, |
| 1538 "LeaseWorkItemResponse": { |
| 1539 "description": "Response to a request to lease WorkItems.", |
| 1540 "id": "LeaseWorkItemResponse", |
| 1541 "properties": { |
| 1542 "workItems": { |
| 1543 "description": "A list of the leased WorkItems.", |
| 1544 "items": { |
| 1545 "$ref": "WorkItem" |
| 1546 }, |
| 1547 "type": "array" |
| 1548 } |
| 1549 }, |
| 1550 "type": "object" |
| 1551 }, |
| 1552 "WorkItem": { |
| 1553 "description": "WorkItem represents basic information about a WorkIt
em to be executed in the cloud.", |
| 1554 "id": "WorkItem", |
| 1555 "properties": { |
| 1556 "id": { |
| 1557 "description": "Identifies this WorkItem.", |
| 1558 "format": "int64", |
| 1559 "type": "string" |
| 1560 }, |
| 1561 "projectId": { |
| 1562 "description": "Identifies the cloud project this WorkItem b
elongs to.", |
| 1563 "type": "string" |
| 1564 }, |
| 1565 "jobId": { |
| 1566 "description": "Identifies the workflow job this WorkItem be
longs to.", |
| 1567 "type": "string" |
| 1568 }, |
| 1569 "packages": { |
| 1570 "description": "Any required packages that need to be fetche
d in order to execute this WorkItem.", |
| 1571 "items": { |
| 1572 "$ref": "Package" |
| 1573 }, |
| 1574 "type": "array" |
| 1575 }, |
| 1576 "mapTask": { |
| 1577 "$ref": "MapTask", |
| 1578 "description": "Additional information for MapTask WorkItems
." |
| 1579 }, |
| 1580 "seqMapTask": { |
| 1581 "$ref": "SeqMapTask", |
| 1582 "description": "Additional information for SeqMapTask WorkIt
ems." |
| 1583 }, |
| 1584 "shellTask": { |
| 1585 "$ref": "ShellTask", |
| 1586 "description": "Additional information for ShellTask WorkIte
ms." |
| 1587 }, |
| 1588 "streamingSetupTask": { |
| 1589 "$ref": "StreamingSetupTask", |
| 1590 "description": "Additional information for StreamingSetupTas
k WorkItems." |
| 1591 }, |
| 1592 "sourceOperationTask": { |
| 1593 "$ref": "SourceOperationRequest", |
| 1594 "description": "Additional information for source operation
WorkItems." |
| 1595 }, |
| 1596 "streamingComputationTask": { |
| 1597 "$ref": "StreamingComputationTask", |
| 1598 "description": "Additional information for StreamingComputat
ionTask WorkItems." |
| 1599 }, |
| 1600 "reportStatusInterval": { |
| 1601 "description": "Recommended reporting interval.", |
| 1602 "type": "string" |
| 1603 }, |
| 1604 "leaseExpireTime": { |
| 1605 "description": "Time when the lease on this [Work][] will ex
pire.", |
| 1606 "type": "string" |
| 1607 }, |
| 1608 "configuration": { |
| 1609 "description": "Work item-specific configuration as an opaqu
e blob.", |
| 1610 "type": "string" |
| 1611 }, |
| 1612 "initialReportIndex": { |
| 1613 "description": "The initial index to use when reporting the
status of the WorkItem.", |
| 1614 "format": "int64", |
| 1615 "type": "string" |
| 1616 } |
| 1617 }, |
| 1618 "type": "object" |
| 1619 }, |
| 1620 "MapTask": { |
| 1621 "description": "MapTask consists of an ordered set of instructions,
each of which describes one particular low-level operation for the worker to per
form in order to accomplish the MapTask's WorkItem. Each instruction must appear
in the list before any instructions which depends on its output.", |
| 1622 "id": "MapTask", |
| 1623 "properties": { |
| 1624 "instructions": { |
| 1625 "description": "The instructions in the MapTask.", |
| 1626 "items": { |
| 1627 "$ref": "ParallelInstruction" |
| 1628 }, |
| 1629 "type": "array" |
| 1630 }, |
| 1631 "systemName": { |
| 1632 "description": "System-defined name of this MapTask. Unique
across the workflow.", |
| 1633 "type": "string" |
| 1634 }, |
| 1635 "stageName": { |
| 1636 "description": "System-defined name of the stage containing
this MapTask. Unique across the workflow.", |
| 1637 "type": "string" |
| 1638 } |
| 1639 }, |
| 1640 "type": "object" |
| 1641 }, |
| 1642 "ParallelInstruction": { |
| 1643 "description": "Describes a particular operation comprising a MapTas
k.", |
| 1644 "id": "ParallelInstruction", |
| 1645 "properties": { |
| 1646 "systemName": { |
| 1647 "description": "System-defined name of this operation. Uniqu
e across the workflow.", |
| 1648 "type": "string" |
| 1649 }, |
| 1650 "name": { |
| 1651 "description": "User-provided name of this operation.", |
| 1652 "type": "string" |
| 1653 }, |
| 1654 "read": { |
| 1655 "$ref": "ReadInstruction", |
| 1656 "description": "Additional information for Read instructions
." |
| 1657 }, |
| 1658 "write": { |
| 1659 "$ref": "WriteInstruction", |
| 1660 "description": "Additional information for Write instruction
s." |
| 1661 }, |
| 1662 "parDo": { |
| 1663 "$ref": "ParDoInstruction", |
| 1664 "description": "Additional information for ParDo instruction
s." |
| 1665 }, |
| 1666 "partialGroupByKey": { |
| 1667 "$ref": "PartialGroupByKeyInstruction", |
| 1668 "description": "Additional information for PartialGroupByKey
instructions." |
| 1669 }, |
| 1670 "flatten": { |
| 1671 "$ref": "FlattenInstruction", |
| 1672 "description": "Additional information for Flatten instructi
ons." |
| 1673 }, |
| 1674 "outputs": { |
| 1675 "description": "Describes the outputs of the instruction.", |
| 1676 "items": { |
| 1677 "$ref": "InstructionOutput" |
| 1678 }, |
| 1679 "type": "array" |
| 1680 } |
| 1681 }, |
| 1682 "type": "object" |
| 1683 }, |
| 1684 "ReadInstruction": { |
| 1685 "description": "An instruction that reads records. Takes no inputs,
produces one output.", |
| 1686 "id": "ReadInstruction", |
| 1687 "properties": { |
| 1688 "source": { |
| 1689 "$ref": "Source", |
| 1690 "description": "The source to read from." |
| 1691 } |
| 1692 }, |
| 1693 "type": "object" |
| 1694 }, |
| 1695 "WriteInstruction": { |
| 1696 "description": "An instruction that writes records. Takes one input,
produces no outputs.", |
| 1697 "id": "WriteInstruction", |
| 1698 "properties": { |
| 1699 "input": { |
| 1700 "$ref": "InstructionInput", |
| 1701 "description": "The input." |
| 1702 }, |
| 1703 "sink": { |
| 1704 "$ref": "Sink", |
| 1705 "description": "The sink to write to." |
| 1706 } |
| 1707 }, |
| 1708 "type": "object" |
| 1709 }, |
| 1710 "InstructionInput": { |
| 1711 "description": "An input of an instruction, as a reference to an out
put of a producer instruction.", |
| 1712 "id": "InstructionInput", |
| 1713 "properties": { |
| 1714 "producerInstructionIndex": { |
| 1715 "description": "The index (origin zero) of the parallel inst
ruction that produces the output to be consumed by this input. This index is rel
ative to the list of instructions in this input's instruction's containing MapTa
sk.", |
| 1716 "format": "int32", |
| 1717 "type": "integer" |
| 1718 }, |
| 1719 "outputNum": { |
| 1720 "description": "The output index (origin zero) within the pr
oducer.", |
| 1721 "format": "int32", |
| 1722 "type": "integer" |
| 1723 } |
| 1724 }, |
| 1725 "type": "object" |
| 1726 }, |
| 1727 "Sink": { |
| 1728 "description": "A sink that records can be encoded and written to.", |
| 1729 "id": "Sink", |
| 1730 "properties": { |
| 1731 "spec": { |
| 1732 "additionalProperties": { |
| 1733 "description": "Properties of the object.", |
| 1734 "type": "any" |
| 1735 }, |
| 1736 "description": "The sink to write to, plus its parameters.", |
| 1737 "type": "object" |
| 1738 }, |
| 1739 "codec": { |
| 1740 "additionalProperties": { |
| 1741 "description": "Properties of the object.", |
| 1742 "type": "any" |
| 1743 }, |
| 1744 "description": "The codec to use to encode data written to t
he sink.", |
| 1745 "type": "object" |
| 1746 } |
| 1747 }, |
| 1748 "type": "object" |
| 1749 }, |
| 1750 "ParDoInstruction": { |
| 1751 "description": "An instruction that does a ParDo operation. Takes on
e main input and zero or more side inputs, and produces zero or more outputs. Ru
ns user code.", |
| 1752 "id": "ParDoInstruction", |
| 1753 "properties": { |
| 1754 "input": { |
| 1755 "$ref": "InstructionInput", |
| 1756 "description": "The input." |
| 1757 }, |
| 1758 "sideInputs": { |
| 1759 "description": "Zero or more side inputs.", |
| 1760 "items": { |
| 1761 "$ref": "SideInputInfo" |
| 1762 }, |
| 1763 "type": "array" |
| 1764 }, |
| 1765 "userFn": { |
| 1766 "additionalProperties": { |
| 1767 "description": "Properties of the object.", |
| 1768 "type": "any" |
| 1769 }, |
| 1770 "description": "The user function to invoke.", |
| 1771 "type": "object" |
| 1772 }, |
| 1773 "numOutputs": { |
| 1774 "description": "The number of outputs.", |
| 1775 "format": "int32", |
| 1776 "type": "integer" |
| 1777 }, |
| 1778 "multiOutputInfos": { |
| 1779 "description": "Information about each of the outputs, if us
er_fn is a MultiDoFn.", |
| 1780 "items": { |
| 1781 "$ref": "MultiOutputInfo" |
| 1782 }, |
| 1783 "type": "array" |
| 1784 } |
| 1785 }, |
| 1786 "type": "object" |
| 1787 }, |
| 1788 "SideInputInfo": { |
| 1789 "description": "Information about a side input of a DoFn or an input
of a SeqDoFn.", |
| 1790 "id": "SideInputInfo", |
| 1791 "properties": { |
| 1792 "sources": { |
| 1793 "description": "The source(s) to read element(s) from to get
the value of this side input. If more than one source, then the elements are ta
ken from the sources, in the specified order if order matters. At least one sour
ce is required.", |
| 1794 "items": { |
| 1795 "$ref": "Source" |
| 1796 }, |
| 1797 "type": "array" |
| 1798 }, |
| 1799 "kind": { |
| 1800 "additionalProperties": { |
| 1801 "description": "Properties of the object.", |
| 1802 "type": "any" |
| 1803 }, |
| 1804 "description": "How to interpret the source element(s) as a
side input value.", |
| 1805 "type": "object" |
| 1806 }, |
| 1807 "tag": { |
| 1808 "description": "The id of the tag the user code will access
this side input by; this should correspond to the tag of some MultiOutputInfo.", |
| 1809 "type": "string" |
| 1810 } |
| 1811 }, |
| 1812 "type": "object" |
| 1813 }, |
| 1814 "MultiOutputInfo": { |
| 1815 "description": "Information about an output of a multi-output DoFn."
, |
| 1816 "id": "MultiOutputInfo", |
| 1817 "properties": { |
| 1818 "tag": { |
| 1819 "description": "The id of the tag the user code will emit to
this output by; this should correspond to the tag of some SideInputInfo.", |
| 1820 "type": "string" |
| 1821 } |
| 1822 }, |
| 1823 "type": "object" |
| 1824 }, |
| 1825 "PartialGroupByKeyInstruction": { |
| 1826 "description": "An instruction that does a partial group-by-key. One
input and one output.", |
| 1827 "id": "PartialGroupByKeyInstruction", |
| 1828 "properties": { |
| 1829 "input": { |
| 1830 "$ref": "InstructionInput", |
| 1831 "description": "Describes the input to the partial group-by-
key instruction." |
| 1832 }, |
| 1833 "inputElementCodec": { |
| 1834 "additionalProperties": { |
| 1835 "description": "Properties of the object.", |
| 1836 "type": "any" |
| 1837 }, |
| 1838 "description": "The codec to use for interpreting an element
in the input PTable.", |
| 1839 "type": "object" |
| 1840 }, |
| 1841 "valueCombiningFn": { |
| 1842 "additionalProperties": { |
| 1843 "description": "Properties of the object.", |
| 1844 "type": "any" |
| 1845 }, |
| 1846 "description": "The value combining function to invoke.", |
| 1847 "type": "object" |
| 1848 } |
| 1849 }, |
| 1850 "type": "object" |
| 1851 }, |
| 1852 "FlattenInstruction": { |
| 1853 "description": "An instruction that copies its inputs (zero or more)
to its (single) output.", |
| 1854 "id": "FlattenInstruction", |
| 1855 "properties": { |
| 1856 "inputs": { |
| 1857 "description": "Describes the inputs to the flatten instruct
ion.", |
| 1858 "items": { |
| 1859 "$ref": "InstructionInput" |
| 1860 }, |
| 1861 "type": "array" |
| 1862 } |
| 1863 }, |
| 1864 "type": "object" |
| 1865 }, |
| 1866 "InstructionOutput": { |
| 1867 "description": "An output of an instruction.", |
| 1868 "id": "InstructionOutput", |
| 1869 "properties": { |
| 1870 "name": { |
| 1871 "description": "The user-provided name of this output.", |
| 1872 "type": "string" |
| 1873 }, |
| 1874 "codec": { |
| 1875 "additionalProperties": { |
| 1876 "description": "Properties of the object.", |
| 1877 "type": "any" |
| 1878 }, |
| 1879 "description": "The codec to use to encode data being writte
n via this output.", |
| 1880 "type": "object" |
| 1881 } |
| 1882 }, |
| 1883 "type": "object" |
| 1884 }, |
| 1885 "SeqMapTask": { |
| 1886 "description": "Describes a particular function to invoke.", |
| 1887 "id": "SeqMapTask", |
| 1888 "properties": { |
| 1889 "inputs": { |
| 1890 "description": "Information about each of the inputs.", |
| 1891 "items": { |
| 1892 "$ref": "SideInputInfo" |
| 1893 }, |
| 1894 "type": "array" |
| 1895 }, |
| 1896 "userFn": { |
| 1897 "additionalProperties": { |
| 1898 "description": "Properties of the object.", |
| 1899 "type": "any" |
| 1900 }, |
| 1901 "description": "The user function to invoke.", |
| 1902 "type": "object" |
| 1903 }, |
| 1904 "outputInfos": { |
| 1905 "description": "Information about each of the outputs.", |
| 1906 "items": { |
| 1907 "$ref": "SeqMapTaskOutputInfo" |
| 1908 }, |
| 1909 "type": "array" |
| 1910 }, |
| 1911 "name": { |
| 1912 "description": "The user-provided name of the SeqDo operatio
n.", |
| 1913 "type": "string" |
| 1914 }, |
| 1915 "systemName": { |
| 1916 "description": "System-defined name of the SeqDo operation.
Unique across the workflow.", |
| 1917 "type": "string" |
| 1918 }, |
| 1919 "stageName": { |
| 1920 "description": "System-defined name of the stage containing
the SeqDo operation. Unique across the workflow.", |
| 1921 "type": "string" |
| 1922 } |
| 1923 }, |
| 1924 "type": "object" |
| 1925 }, |
| 1926 "SeqMapTaskOutputInfo": { |
| 1927 "description": "Information about an output of a SeqMapTask.", |
| 1928 "id": "SeqMapTaskOutputInfo", |
| 1929 "properties": { |
| 1930 "tag": { |
| 1931 "description": "The id of the TupleTag the user code will ta
g the output value by.", |
| 1932 "type": "string" |
| 1933 }, |
| 1934 "sink": { |
| 1935 "$ref": "Sink", |
| 1936 "description": "The sink to write the output value to." |
| 1937 } |
| 1938 }, |
| 1939 "type": "object" |
| 1940 }, |
| 1941 "ShellTask": { |
| 1942 "description": "A task which consists of a shell command for the wor
ker to execute.", |
| 1943 "id": "ShellTask", |
| 1944 "properties": { |
| 1945 "command": { |
| 1946 "description": "The shell command to run.", |
| 1947 "type": "string" |
| 1948 }, |
| 1949 "exitCode": { |
| 1950 "description": "Exit code for the task.", |
| 1951 "format": "int32", |
| 1952 "type": "integer" |
| 1953 } |
| 1954 }, |
| 1955 "type": "object" |
| 1956 }, |
| 1957 "StreamingSetupTask": { |
| 1958 "description": "A task which initializes part of a streaming Dataflo
w job.", |
| 1959 "id": "StreamingSetupTask", |
| 1960 "properties": { |
| 1961 "receiveWorkPort": { |
| 1962 "description": "The TCP port on which the worker should list
en for messages from other streaming computation workers.", |
| 1963 "format": "int32", |
| 1964 "type": "integer" |
| 1965 }, |
| 1966 "workerHarnessPort": { |
| 1967 "description": "The TCP port used by the worker to communica
te with the Dataflow worker harness.", |
| 1968 "format": "int32", |
| 1969 "type": "integer" |
| 1970 }, |
| 1971 "streamingComputationTopology": { |
| 1972 "$ref": "TopologyConfig", |
| 1973 "description": "The global topology of the streaming Dataflo
w job." |
| 1974 } |
| 1975 }, |
| 1976 "type": "object" |
| 1977 }, |
| 1978 "TopologyConfig": { |
| 1979 "description": "Global topology of the streaming Dataflow job, inclu
ding all computations and their sharded locations.", |
| 1980 "id": "TopologyConfig", |
| 1981 "properties": { |
| 1982 "computations": { |
| 1983 "description": "The computations associated with a streaming
Dataflow job.", |
| 1984 "items": { |
| 1985 "$ref": "ComputationTopology" |
| 1986 }, |
| 1987 "type": "array" |
| 1988 }, |
| 1989 "dataDiskAssignments": { |
| 1990 "description": "The disks assigned to a streaming Dataflow j
ob.", |
| 1991 "items": { |
| 1992 "$ref": "DataDiskAssignment" |
| 1993 }, |
| 1994 "type": "array" |
| 1995 }, |
| 1996 "userStageToComputationNameMap": { |
| 1997 "additionalProperties": { |
| 1998 "type": "string" |
| 1999 }, |
| 2000 "description": "Maps user stage names to stable computation
names.", |
| 2001 "type": "object" |
| 2002 } |
| 2003 }, |
| 2004 "type": "object" |
| 2005 }, |
| 2006 "ComputationTopology": { |
| 2007 "description": "All configuration data for a particular Computation.
", |
| 2008 "id": "ComputationTopology", |
| 2009 "properties": { |
| 2010 "systemStageName": { |
| 2011 "description": "The system stage name.", |
| 2012 "type": "string" |
| 2013 }, |
| 2014 "computationId": { |
| 2015 "description": "The ID of the computation.", |
| 2016 "type": "string" |
| 2017 }, |
| 2018 "userStageName": { |
| 2019 "description": "The user stage name.", |
| 2020 "type": "string" |
| 2021 }, |
| 2022 "keyRanges": { |
| 2023 "description": "The key ranges processed by the computation.
", |
| 2024 "items": { |
| 2025 "$ref": "KeyRangeLocation" |
| 2026 }, |
| 2027 "type": "array" |
| 2028 }, |
| 2029 "inputs": { |
| 2030 "description": "The inputs to the computation.", |
| 2031 "items": { |
| 2032 "$ref": "StreamLocation" |
| 2033 }, |
| 2034 "type": "array" |
| 2035 }, |
| 2036 "outputs": { |
| 2037 "description": "The outputs from the computation.", |
| 2038 "items": { |
| 2039 "$ref": "StreamLocation" |
| 2040 }, |
| 2041 "type": "array" |
| 2042 }, |
| 2043 "stateFamilies": { |
| 2044 "description": "The state family values.", |
| 2045 "items": { |
| 2046 "$ref": "StateFamilyConfig" |
| 2047 }, |
| 2048 "type": "array" |
| 2049 } |
| 2050 }, |
| 2051 "type": "object" |
| 2052 }, |
| 2053 "KeyRangeLocation": { |
| 2054 "description": "Location information for a specific key-range of a s
harded computation. Currently we only support UTF-8 character splits to simplify
encoding into JSON.", |
| 2055 "id": "KeyRangeLocation", |
| 2056 "properties": { |
| 2057 "start": { |
| 2058 "description": "The start (inclusive) of the key range.", |
| 2059 "type": "string" |
| 2060 }, |
| 2061 "end": { |
| 2062 "description": "The end (exclusive) of the key range.", |
| 2063 "type": "string" |
| 2064 }, |
| 2065 "deliveryEndpoint": { |
| 2066 "description": "The physical location of this range assignme
nt to be used for streaming computation cross-worker message delivery.", |
| 2067 "type": "string" |
| 2068 }, |
| 2069 "persistentDirectory": { |
| 2070 "description": "The location of the persistent state for thi
s range, as a persistent directory in the worker local filesystem.", |
| 2071 "type": "string" |
| 2072 }, |
| 2073 "dataDisk": { |
| 2074 "description": "The name of the data disk where data for thi
s range is stored. This name is local to the Google Cloud Platform project and u
niquely identifies the disk within that project, for example \"myproject-1014-10
4817-4c2-harness-0-disk-1\".", |
| 2075 "type": "string" |
| 2076 } |
| 2077 }, |
| 2078 "type": "object" |
| 2079 }, |
| 2080 "StreamLocation": { |
| 2081 "description": "Describes a stream of data, either as input to be pr
ocessed or as output of a streaming Dataflow job.", |
| 2082 "id": "StreamLocation", |
| 2083 "properties": { |
| 2084 "streamingStageLocation": { |
| 2085 "$ref": "StreamingStageLocation", |
| 2086 "description": "The stream is part of another computation wi
thin the current streaming Dataflow job." |
| 2087 }, |
| 2088 "pubsubLocation": { |
| 2089 "$ref": "PubsubLocation", |
| 2090 "description": "The stream is a pubsub stream." |
| 2091 }, |
| 2092 "sideInputLocation": { |
| 2093 "$ref": "StreamingSideInputLocation", |
| 2094 "description": "The stream is a streaming side input." |
| 2095 }, |
| 2096 "customSourceLocation": { |
| 2097 "$ref": "CustomSourceLocation", |
| 2098 "description": "The stream is a custom source." |
| 2099 } |
| 2100 }, |
| 2101 "type": "object" |
| 2102 }, |
| 2103 "StreamingStageLocation": { |
| 2104 "description": "Identifies the location of a streaming computation s
tage, for stage-to-stage communication.", |
| 2105 "id": "StreamingStageLocation", |
| 2106 "properties": { |
| 2107 "streamId": { |
| 2108 "description": "Identifies the particular stream within the
streaming Dataflow job.", |
| 2109 "type": "string" |
| 2110 } |
| 2111 }, |
| 2112 "type": "object" |
| 2113 }, |
| 2114 "PubsubLocation": { |
| 2115 "description": "Identifies a pubsub location to use for transferring
data into or out of a streaming Dataflow job.", |
| 2116 "id": "PubsubLocation", |
| 2117 "properties": { |
| 2118 "topic": { |
| 2119 "description": "A pubsub topic, in the form of \"pubsub.goog
leapis.com/topics/\n/\"", |
| 2120 "type": "string" |
| 2121 }, |
| 2122 "subscription": { |
| 2123 "description": "A pubsub subscription, in the form of \"pubs
ub.googleapis.com/subscriptions/\n/\"", |
| 2124 "type": "string" |
| 2125 }, |
| 2126 "timestampLabel": { |
| 2127 "description": "If set, contains a pubsub label from which t
o extract record timestamps. If left empty, record timestamps will be generated
upon arrival.", |
| 2128 "type": "string" |
| 2129 }, |
| 2130 "idLabel": { |
| 2131 "description": "If set, contains a pubsub label from which t
o extract record ids. If left empty, record deduplication will be strictly best
effort.", |
| 2132 "type": "string" |
| 2133 }, |
| 2134 "dropLateData": { |
| 2135 "description": "Indicates whether the pipeline allows late-a
rriving data.", |
| 2136 "type": "boolean" |
| 2137 }, |
| 2138 "trackingSubscription": { |
| 2139 "description": "If set, specifies the pubsub subscription th
at will be used for tracking custom time timestamps for watermark estimation.", |
| 2140 "type": "string" |
| 2141 } |
| 2142 }, |
| 2143 "type": "object" |
| 2144 }, |
| 2145 "StreamingSideInputLocation": { |
| 2146 "description": "Identifies the location of a streaming side input.", |
| 2147 "id": "StreamingSideInputLocation", |
| 2148 "properties": { |
| 2149 "tag": { |
| 2150 "description": "Identifies the particular side input within
the streaming Dataflow job.", |
| 2151 "type": "string" |
| 2152 }, |
| 2153 "stateFamily": { |
| 2154 "description": "Identifies the state family where this side
input is stored.", |
| 2155 "type": "string" |
| 2156 } |
| 2157 }, |
| 2158 "type": "object" |
| 2159 }, |
| 2160 "CustomSourceLocation": { |
| 2161 "description": "Identifies the location of a custom souce.", |
| 2162 "id": "CustomSourceLocation", |
| 2163 "properties": { |
| 2164 "stateful": { |
| 2165 "description": "Whether this source is stateful.", |
| 2166 "type": "boolean" |
| 2167 } |
| 2168 }, |
| 2169 "type": "object" |
| 2170 }, |
| 2171 "StateFamilyConfig": { |
| 2172 "description": "State family configuration.", |
| 2173 "id": "StateFamilyConfig", |
| 2174 "properties": { |
| 2175 "stateFamily": { |
| 2176 "description": "The state family value.", |
| 2177 "type": "string" |
| 2178 }, |
| 2179 "isRead": { |
| 2180 "description": "If true, this family corresponds to a read o
peration.", |
| 2181 "type": "boolean" |
| 2182 } |
| 2183 }, |
| 2184 "type": "object" |
| 2185 }, |
| 2186 "DataDiskAssignment": { |
| 2187 "description": "Data disk assignment for a given VM instance.", |
| 2188 "id": "DataDiskAssignment", |
| 2189 "properties": { |
| 2190 "vmInstance": { |
| 2191 "description": "VM instance name the data disks mounted to,
for example \"myproject-1014-104817-4c2-harness-0\".", |
| 2192 "type": "string" |
| 2193 }, |
| 2194 "dataDisks": { |
| 2195 "description": "Mounted data disks. The order is important a
data disk's 0-based index in this list defines which persistent directory the d
isk is mounted to, for example the list of { \"myproject-1014-104817-4c2-harness
-0-disk-0\" }, { \"myproject-1014-104817-4c2-harness-0-disk-1\" }.", |
| 2196 "items": { |
| 2197 "type": "string" |
| 2198 }, |
| 2199 "type": "array" |
| 2200 } |
| 2201 }, |
| 2202 "type": "object" |
| 2203 }, |
| 2204 "SourceOperationRequest": { |
| 2205 "description": "A work item that represents the different operations
that can be performed on a user-defined Source specification.", |
| 2206 "id": "SourceOperationRequest", |
| 2207 "properties": { |
| 2208 "split": { |
| 2209 "$ref": "SourceSplitRequest", |
| 2210 "description": "Information about a request to split a sourc
e." |
| 2211 }, |
| 2212 "getMetadata": { |
| 2213 "$ref": "SourceGetMetadataRequest", |
| 2214 "description": "Information about a request to get metadata
about a source." |
| 2215 } |
| 2216 }, |
| 2217 "type": "object" |
| 2218 }, |
| 2219 "SourceSplitRequest": { |
| 2220 "description": "Represents the operation to split a high-level Sourc
e specification into bundles (parts for parallel processing). At a high level, s
plitting of a source into bundles happens as follows: SourceSplitRequest is appl
ied to the source. If it returns SOURCE_SPLIT_OUTCOME_USE_CURRENT, no further sp
litting happens and the source is used \"as is\". Otherwise, splitting is applie
d recursively to each produced DerivedSource. As an optimization, for any Source
, if its does_not_need_splitting is true, the framework assumes that splitting t
his source would return SOURCE_SPLIT_OUTCOME_USE_CURRENT, and doesn't initiate a
SourceSplitRequest. This applies both to the initial source being split and to
bundles produced from it.", |
| 2221 "id": "SourceSplitRequest", |
| 2222 "properties": { |
| 2223 "source": { |
| 2224 "$ref": "Source", |
| 2225 "description": "Specification of the source to be split." |
| 2226 }, |
| 2227 "options": { |
| 2228 "$ref": "SourceSplitOptions", |
| 2229 "description": "Hints for tuning the splitting process." |
| 2230 } |
| 2231 }, |
| 2232 "type": "object" |
| 2233 }, |
| 2234 "SourceSplitOptions": { |
| 2235 "description": "Hints for splitting a Source into bundles (parts for
parallel processing) using SourceSplitRequest.", |
| 2236 "id": "SourceSplitOptions", |
| 2237 "properties": { |
| 2238 "desiredBundleSizeBytes": { |
| 2239 "description": "The source should be split into a set of bun
dles where the estimated size of each is approximately this many bytes.", |
| 2240 "format": "int64", |
| 2241 "type": "string" |
| 2242 }, |
| 2243 "desiredShardSizeBytes": { |
| 2244 "description": "DEPRECATED in favor of desired_bundle_size_b
ytes.", |
| 2245 "format": "int64", |
| 2246 "type": "string" |
| 2247 } |
| 2248 }, |
| 2249 "type": "object" |
| 2250 }, |
| 2251 "SourceGetMetadataRequest": { |
| 2252 "description": "A request to compute the SourceMetadata of a Source.
", |
| 2253 "id": "SourceGetMetadataRequest", |
| 2254 "properties": { |
| 2255 "source": { |
| 2256 "$ref": "Source", |
| 2257 "description": "Specification of the source whose metadata s
hould be computed." |
| 2258 } |
| 2259 }, |
| 2260 "type": "object" |
| 2261 }, |
| 2262 "StreamingComputationTask": { |
| 2263 "description": "A task which describes what action should be perform
ed for the specified streaming computation ranges.", |
| 2264 "id": "StreamingComputationTask", |
| 2265 "properties": { |
| 2266 "taskType": { |
| 2267 "description": "A type of streaming computation task.", |
| 2268 "enum": [ |
| 2269 "STREAMING_COMPUTATION_TASK_UNKNOWN", |
| 2270 "STREAMING_COMPUTATION_TASK_STOP", |
| 2271 "STREAMING_COMPUTATION_TASK_START" |
| 2272 ], |
| 2273 "type": "string" |
| 2274 }, |
| 2275 "dataDisks": { |
| 2276 "description": "Describes the set of data disks this task sh
ould apply to.", |
| 2277 "items": { |
| 2278 "$ref": "MountedDataDisk" |
| 2279 }, |
| 2280 "type": "array" |
| 2281 }, |
| 2282 "computationRanges": { |
| 2283 "description": "Contains ranges of a streaming computation t
his task should apply to.", |
| 2284 "items": { |
| 2285 "$ref": "StreamingComputationRanges" |
| 2286 }, |
| 2287 "type": "array" |
| 2288 } |
| 2289 }, |
| 2290 "type": "object" |
| 2291 }, |
| 2292 "MountedDataDisk": { |
| 2293 "description": "Describes mounted data disk.", |
| 2294 "id": "MountedDataDisk", |
| 2295 "properties": { |
| 2296 "dataDisk": { |
| 2297 "description": "The name of the data disk. This name is loca
l to the Google Cloud Platform project and uniquely identifies the disk within t
hat project, for example \"myproject-1014-104817-4c2-harness-0-disk-1\".", |
| 2298 "type": "string" |
| 2299 } |
| 2300 }, |
| 2301 "type": "object" |
| 2302 }, |
| 2303 "StreamingComputationRanges": { |
| 2304 "description": "Describes full or partial data disk assignment infor
mation of the computation ranges.", |
| 2305 "id": "StreamingComputationRanges", |
| 2306 "properties": { |
| 2307 "computationId": { |
| 2308 "description": "The ID of the computation.", |
| 2309 "type": "string" |
| 2310 }, |
| 2311 "rangeAssignments": { |
| 2312 "description": "Data disk assignments for ranges from this c
omputation.", |
| 2313 "items": { |
| 2314 "$ref": "KeyRangeDataDiskAssignment" |
| 2315 }, |
| 2316 "type": "array" |
| 2317 } |
| 2318 }, |
| 2319 "type": "object" |
| 2320 }, |
| 2321 "KeyRangeDataDiskAssignment": { |
| 2322 "description": "Data disk assignment information for a specific key-
range of a sharded computation. Currently we only support UTF-8 character splits
to simplify encoding into JSON.", |
| 2323 "id": "KeyRangeDataDiskAssignment", |
| 2324 "properties": { |
| 2325 "start": { |
| 2326 "description": "The start (inclusive) of the key range.", |
| 2327 "type": "string" |
| 2328 }, |
| 2329 "end": { |
| 2330 "description": "The end (exclusive) of the key range.", |
| 2331 "type": "string" |
| 2332 }, |
| 2333 "dataDisk": { |
| 2334 "description": "The name of the data disk where data for thi
s range is stored. This name is local to the Google Cloud Platform project and u
niquely identifies the disk within that project, for example \"myproject-1014-10
4817-4c2-harness-0-disk-1\".", |
| 2335 "type": "string" |
| 2336 } |
| 2337 }, |
| 2338 "type": "object" |
| 2339 } |
| 2340 }, |
100 "servicePath": "", | 2341 "servicePath": "", |
101 "title": "Google Dataflow API", | 2342 "title": "Google Dataflow API", |
102 "version": "v1b3" | 2343 "version": "v1b3" |
103 } | 2344 } |
OLD | NEW |