Skip to content

Commit

Permalink
Versioning 2 API (#393)
Browse files Browse the repository at this point in the history
Worker Versioning 2 API

Co-authored-by: Carly de Frondeville <[email protected]>
  • Loading branch information
ShahabT and carlydf authored Apr 10, 2024
1 parent 268fe9c commit 2227a14
Show file tree
Hide file tree
Showing 11 changed files with 1,161 additions and 82 deletions.
3 changes: 3 additions & 0 deletions buf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ breaking:
- temporal/api/operatorservice/v1/request_response.proto
- temporal/api/operatorservice/v1/service.proto
- temporal/api/nexus/v1/message.proto
- temporal/api/command/v1/message.proto
- temporal/api/history/v1/message.proto
- temporal/api/common/v1/message.proto
lint:
use:
- DEFAULT
Expand Down
477 changes: 446 additions & 31 deletions openapi/openapiv2.json

Large diffs are not rendered by default.

353 changes: 329 additions & 24 deletions openapi/openapiv3.yaml

Large diffs are not rendered by default.

20 changes: 9 additions & 11 deletions temporal/api/command/v1/message.proto
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,9 @@ message ScheduleActivityTaskCommandAttributes {
// Request to start the activity directly bypassing matching service and worker polling
// The slot for executing the activity should be reserved when setting this field to true.
bool request_eager_execution = 12;
// If this is set, the workflow executing this command wishes to start the activity using
// a version compatible with the version that this workflow most recently ran on, if such
// behavior is possible.
bool use_compatible_version = 13;
// If this is set, the activity would be assigned to the Build ID of the workflow. Otherwise,
// Assignment rules of the activity's Task Queue will be used to determine the Build ID.
bool use_workflow_build_id = 13;
}

message RequestCancelActivityTaskCommandAttributes {
Expand Down Expand Up @@ -193,9 +192,9 @@ message ContinueAsNewWorkflowExecutionCommandAttributes {
temporal.api.common.v1.Header header = 12;
temporal.api.common.v1.Memo memo = 13;
temporal.api.common.v1.SearchAttributes search_attributes = 14;
// If this is set, the workflow executing this command wishes to continue as new using a version
// compatible with the version that this workflow most recently ran on.
bool use_compatible_version = 15;
// If this is set, the new execution inherits the Build ID of the current execution. Otherwise,
// the assignment rules will be used to independently assign a Build ID to the new execution.
bool inherit_build_id = 15;

// `workflow_execution_timeout` is omitted as it shouldn't be overridden from within a workflow.
}
Expand Down Expand Up @@ -223,10 +222,9 @@ message StartChildWorkflowExecutionCommandAttributes {
temporal.api.common.v1.Header header = 14;
temporal.api.common.v1.Memo memo = 15;
temporal.api.common.v1.SearchAttributes search_attributes = 16;
// If this is set, the workflow executing this command wishes to start the child workflow using
// a version compatible with the version that this workflow most recently ran on, if such
// behavior is possible.
bool use_compatible_version = 17;
// If this is set, the child workflow inherits the Build ID of the parent. Otherwise, the assignment
// rules of the child's Task Queue will be used to independently assign a Build ID to it.
bool inherit_build_id = 17;
}

message ProtocolMessageCommandAttributes {
Expand Down
5 changes: 2 additions & 3 deletions temporal/api/common/v1/message.proto
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,12 @@ message WorkerVersionStamp {
// An opaque whole-worker identifier. Replaces the deprecated `binary_checksum` field when this
// message is included in requests which previously used that.
string build_id = 1;
// Set if the worker used a dynamically loadable bundle to process
// the task. The bundle could be a WASM blob, JS bundle, etc.
string bundle_id = 2;

// If set, the worker is opting in to worker versioning. Otherwise, this is used only as a
// marker for workflow reset points and the BuildIDs search attribute.
bool use_versioning = 3;

// Later, may include bundle id that could be used for WASM and/or JS dynamically loadable bundles.
}

// Identifies the version(s) that a worker is compatible with when polling or identifying itself,
Expand Down
28 changes: 28 additions & 0 deletions temporal/api/enums/v1/task_queue.proto
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum TaskQueueType {

// Specifies which category of tasks may reach a worker on a versioned task queue.
// Used both in a reachability query and its response.
// Deprecated.
enum TaskReachability {
TASK_REACHABILITY_UNSPECIFIED = 0;
// There's a possiblity for a worker to receive new workflow tasks. Workers should *not* be retired.
Expand All @@ -78,3 +79,30 @@ enum TaskReachability {
TASK_REACHABILITY_CLOSED_WORKFLOWS = 4;
}

// Specifies which category of tasks may reach a versioned worker of a certain Build ID.
// Note: future activities who inherit their workflow's Build ID but not its Task Queue will not be
// accounted for reachability as server cannot not know if they'll happen as they do not use
// assignment rules of their Task Queue. Same goes for Child Workflows or Continue-As-New Workflows
// who inherit the parent/previous workflow's Build ID but not its Task Queue. In those cases, make
// sure to query reachability for the parent/previous workflow's Task Queue as well.
enum BuildIdTaskReachability {
// Task reachability is not reported
BUILD_ID_TASK_REACHABILITY_UNSPECIFIED = 0;
// Build ID may be used by new workflows or activities (base on versioning rules), or there are
// open workflows or backlogged activities assigned to it.
BUILD_ID_TASK_REACHABILITY_REACHABLE = 1;
// Build ID does not have open workflows and is not reachable by new workflows,
// but MAY have closed workflows within the namespace retention period.
// Not applicable to activity-only task queues.
BUILD_ID_TASK_REACHABILITY_CLOSED_WORKFLOWS_ONLY = 2;
// Build ID is not used for new executions, nor it has been used by any existing execution
// within the retention period.
BUILD_ID_TASK_REACHABILITY_UNREACHABLE = 3;
}

enum DescribeTaskQueueMode {
// Unspecified means legacy behavior.
DESCRIBE_TASK_QUEUE_MODE_UNSPECIFIED = 0;
// Enhanced mode reports aggregated results for all partitions, supports Build IDs, and reports richer info.
DESCRIBE_TASK_QUEUE_MODE_ENHANCED = 1;
}
39 changes: 27 additions & 12 deletions temporal/api/history/v1/message.proto
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ message WorkflowExecutionStartedEventAttributes {
string workflow_id = 28;
// If this workflow intends to use anything other than the current overall default version for
// the queue, then we include it here.
// Deprecated. use `inherited_build_id` instead
temporal.api.common.v1.WorkerVersionStamp source_version_stamp = 29;

// Completion callbacks attached when this workflow was started.
repeated temporal.api.common.v1.Callback completion_callbacks = 30;

Expand All @@ -126,6 +126,8 @@ message WorkflowExecutionStartedEventAttributes {
// Scenario 5: Workflow W1 is reseted, creating W2.
// - The root workflow of W1 is W1 and the root workflow of W2 is W2.
temporal.api.common.v1.WorkflowExecution root_workflow_execution = 31;
// When present, this execution is assigned to the build ID of its parent or previous execution.
string inherited_build_id = 32;
}

message WorkflowExecutionCompletedEventAttributes {
Expand Down Expand Up @@ -178,9 +180,9 @@ message WorkflowExecutionContinuedAsNewEventAttributes {
temporal.api.common.v1.Header header = 12;
temporal.api.common.v1.Memo memo = 13;
temporal.api.common.v1.SearchAttributes search_attributes = 14;
// If this is set, the workflow executing this command wishes to continue as new using a version
// compatible with the version that this workflow most recently ran on.
bool use_compatible_version = 15;
// If this is set, the new execution inherits the Build ID of the current execution. Otherwise,
// the assignment rules will be used to independently assign a Build ID to the new execution.
bool inherit_build_id = 15;

// workflow_execution_timeout is omitted as it shouldn't be overridden from within a workflow.
}
Expand Down Expand Up @@ -211,6 +213,11 @@ message WorkflowTaskStartedEventAttributes {
// continue-as-new regardless of the suggestion. Note that history event count is
// just the event id of this event, so we don't include it explicitly here.
int64 history_size_bytes = 5;
// Version info of the worker to whom this task was dispatched.
temporal.api.common.v1.WorkerVersionStamp worker_version = 6;
// Used by server internally to properly reapply build ID redirects to an execution
// when rebuilding it from events.
int64 build_id_redirect_counter = 7;
}

message WorkflowTaskCompletedEventAttributes {
Expand All @@ -225,6 +232,7 @@ message WorkflowTaskCompletedEventAttributes {
// Version info of the worker who processed this workflow task. If present, the `build_id` field
// within is also used as `binary_checksum`, which may be omitted in that case (it may also be
// populated to preserve compatibility).
// Deprecated. Use the info inside the corresponding WorkflowTaskStartedEvent
temporal.api.common.v1.WorkerVersionStamp worker_version = 5;
// Data the SDK wishes to record for itself, but server need not interpret, and does not
// directly impact workflow state.
Expand Down Expand Up @@ -264,6 +272,7 @@ message WorkflowTaskFailedEventAttributes {
// Version info of the worker who processed this workflow task. If present, the `build_id` field
// within is also used as `binary_checksum`, which may be omitted in that case (it may also be
// populated to preserve compatibility).
// Deprecated. Use the info inside the corresponding WorkflowTaskStartedEvent
temporal.api.common.v1.WorkerVersionStamp worker_version = 10;
}

Expand Down Expand Up @@ -305,10 +314,9 @@ message ActivityTaskScheduledEventAttributes {
// configuration. Retries will happen up to `schedule_to_close_timeout`. To disable retries set
// retry_policy.maximum_attempts to 1.
temporal.api.common.v1.RetryPolicy retry_policy = 12;
// If this is set, the workflow executing this command wishes to start the activity using
// a version compatible with the version that this workflow most recently ran on, if such
// behavior is possible.
bool use_compatible_version = 13;
// If this is set, the activity would be assigned to the Build ID of the workflow. Otherwise,
// Assignment rules of the activity's Task Queue will be used to determine the Build ID.
bool use_workflow_build_id = 13;
}

message ActivityTaskStartedEventAttributes {
Expand All @@ -323,6 +331,11 @@ message ActivityTaskStartedEventAttributes {
// Will be set to the most recent failure details, if this task has previously failed and then
// been retried.
temporal.api.failure.v1.Failure last_failure = 5;
// Version info of the worker to whom this task was dispatched.
temporal.api.common.v1.WorkerVersionStamp worker_version = 6;
// Used by server internally to properly reapply build ID redirects to an execution
// when rebuilding it from events.
int64 build_id_redirect_counter = 7;
}

message ActivityTaskCompletedEventAttributes {
Expand All @@ -335,6 +348,7 @@ message ActivityTaskCompletedEventAttributes {
// id of the worker that completed this task
string identity = 4;
// Version info of the worker who processed this workflow task.
// Deprecated. Use the info inside the corresponding ActivityTaskStartedEvent
temporal.api.common.v1.WorkerVersionStamp worker_version = 5;
}

Expand All @@ -349,6 +363,7 @@ message ActivityTaskFailedEventAttributes {
string identity = 4;
temporal.api.enums.v1.RetryState retry_state = 5;
// Version info of the worker who processed this workflow task.
// Deprecated. Use the info inside the corresponding ActivityTaskStartedEvent
temporal.api.common.v1.WorkerVersionStamp worker_version = 6;
}

Expand Down Expand Up @@ -383,6 +398,7 @@ message ActivityTaskCanceledEventAttributes {
// id of the worker who canceled this activity
string identity = 5;
// Version info of the worker who processed this workflow task.
// Deprecated. Use the info inside the corresponding ActivityTaskStartedEvent
temporal.api.common.v1.WorkerVersionStamp worker_version = 6;
}

Expand Down Expand Up @@ -603,10 +619,9 @@ message StartChildWorkflowExecutionInitiatedEventAttributes {
temporal.api.common.v1.Header header = 15;
temporal.api.common.v1.Memo memo = 16;
temporal.api.common.v1.SearchAttributes search_attributes = 17;
// If this is set, the workflow executing this command wishes to start the child workflow using
// a version compatible with the version that this workflow most recently ran on, if such
// behavior is possible.
bool use_compatible_version = 19;
// If this is set, the child workflow inherits the Build ID of the parent. Otherwise, the assignment
// rules of the child's Task Queue will be used to independently assign a Build ID to it.
bool inherit_build_id = 19;
}

message StartChildWorkflowExecutionFailedEventAttributes {
Expand Down
116 changes: 116 additions & 0 deletions temporal/api/taskqueue/v1/message.proto
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,29 @@ message TaskQueueMetadata {
google.protobuf.DoubleValue max_tasks_per_second = 1;
}

// Used for specifying versions the caller is interested in.
message TaskQueueVersionSelection {
// Include specific Build IDs.
repeated string build_ids = 1;
// Include the unversioned queue.
bool unversioned = 2;
// Include all active versions. A version is considered active if it has had new
// tasks or polls recently.
bool all_active = 3;
}

message TaskQueueVersionInfo {
// Task Queue info per Task Type. Key is the numerical value of the temporal.api.enums.v1.TaskQueueType enum.
map<int32, TaskQueueTypeInfo> types_info = 1;
temporal.api.enums.v1.BuildIdTaskReachability task_reachability = 2;
}

message TaskQueueTypeInfo {
// Unversioned workers (with `useVersioning=false`) are reported in unversioned result even if they set a Build ID.
repeated PollerInfo pollers = 1;
}

// Deprecated. Use `InternalTaskQueueStatus`. This is kept until `DescribeTaskQueue` supports legacy behavior.
message TaskQueueStatus {
int64 backlog_count_hint = 1;
int64 read_level = 2;
Expand Down Expand Up @@ -111,3 +134,96 @@ message BuildIdReachability {
// Reachability per task queue.
repeated TaskQueueReachability task_queue_reachability = 2;
}

message RampByPercentage {
// Acceptable range is [0,100).
float ramp_percentage = 1;
}

// These rules assign a Build ID to Unassigned Workflow Executions and
// Activities.
//
// Specifically, assignment rules are applied to the following Executions or
// Activities when they are scheduled in a Task Queue:
// - Generally, any new Workflow Execution, except:
// - When A Child Workflow or a Continue-As-New Execution inherits the
// Build ID from its parent/previous execution by setting the
// `inherit_build_id` flag.
// - Workflow Executions started Eagerly are assigned to the Build ID of
// the Starter.
// - An Activity that is scheduled on a Task Queue different from the one
// their Workflow runs on, unless the `use_workflow_build_id` flag is set.
//
// In absence of (applicable) redirect rules (`CompatibleBuildIdRedirectRule`s)
// the task will be dispatched to Workers of the Build ID determined by the
// assignment rules. Otherwise, the final Build ID will be determined by the
// redirect rules.
//
// When using Worker Versioning, in the steady state, for a given Task Queue,
// there should typically be exactly one assignment rule to send all Unassigned
// tasks to the latest Build ID. Existence of at least one such "unconditional"
// rule at all times is enforce by the system, unless the `force` flag is used
// by the user when replacing/deleting these rules (for exceptional cases).
//
// During a deployment, one or more additional rules can be added to assign a
// subset of the tasks to a new Build ID based on a "ramp percentage".
//
// When there are multiple assignment rules for a Task Queue, the rules are
// evaluated in order, starting from index 0. The first applicable rule will be
// applied and the rest will be ignored.
//
// In the event that no assignment rule is applicable on a task (or the Task
// Queue is simply not versioned), the tasks will be sent to unversioned
// workers, if available. Otherwise, they remain Unassigned, and will be
// retried for assignment, or dispatch to unversioned workers, at a later time
// depending on the availability of workers.
message BuildIdAssignmentRule {
string target_build_id = 1;

// If a ramp is provided, this rule will be applied only to a sample of
// tasks according to the provided percentage.
// This option can be used only on "terminal" Build IDs (the ones not used
// as source in any redirect rules).
oneof ramp {
// This ramp is useful for gradual Blue/Green deployments (and similar)
// where you want to send a certain portion of the traffic to the target
// Build ID.
RampByPercentage percentage_ramp = 3;
}
}

// These rules apply to tasks assigned to a particular Build ID
// (`source_build_id`) to redirect them to another *compatible* Build ID
// (`target_build_id`).
//
// It is user's responsibility to ensure that the target Build ID is compatible
// with the source Build ID (e.g. by using the Patching API).
//
// Most deployments are not expected to need these rules, however following
// situations can greatly benefit from redirects:
// - Need to move long-running Workflow Executions from an old Build ID to a
// newer one.
// - Need to hotfix some broken or stuck Workflow Executions.
//
// In steady state, redirect rules are beneficial when dealing with old
// Executions ran on now-decommissioned Build IDs:
// - To redirecting the Workflow Queries to the current (compatible) Build ID.
// - To be able to Reset an old Execution so it can run on the current
// (compatible) Build ID.
//
// Redirect rules can be chained, but only the last rule in the chain can have
// a ramp.
message CompatibleBuildIdRedirectRule {
string source_build_id = 1;
string target_build_id = 2;
}

message TimestampedBuildIdAssignmentRule {
BuildIdAssignmentRule rule = 1;
google.protobuf.Timestamp create_time = 2;
}

message TimestampedCompatibleBuildIdRedirectRule {
CompatibleBuildIdRedirectRule rule = 1;
google.protobuf.Timestamp create_time = 2;
}
Loading

0 comments on commit 2227a14

Please sign in to comment.