-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
pipeline.yml
79 lines (67 loc) · 1.93 KB
/
pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
display_name: partition by key parallel job
description: The hello world pipeline job with partition by key
experiment_name: hello-world-parallel-job
tags:
tag: tagvalue
owner: sdkteam
settings:
default_compute: azureml:cpu-cluster
jobs:
partition_job:
type: command
component: ./src/partition_data/partition_data.yml
inputs:
data_source:
type: uri_file
path: ./oj_sales_data/oj_sales_data.csv
partition_keys: Store,Brand
outputs:
tabular_output_data:
type: mltable
mode: rw_mount
parallel_train:
type: parallel
compute: azureml:cpu-cluster
inputs:
data_source:
path: ${{parent.jobs.partition_job.outputs.tabular_output_data}}
type: mltable
mode: direct
drop_cols: "Revenue,Advert,Store,Brand"
target_col: "Quantity"
date_col: "WeekStarting"
lagging_orders: "1,2,3,4,5,6"
outputs:
model_folder:
type: uri_folder
mode: rw_mount
partition_keys:
- Store
- Brand
resources:
instance_count: 2
error_threshold: -1
mini_batch_error_threshold: 5
logging_level: "INFO"
input_data: ${{inputs.data_source}}
max_concurrency_per_instance: 2
retry_settings:
max_retries: 2
timeout: 60
environment_variables:
"AZUREML_PARALLEL_EXAMPLE": "1a_yaml"
task:
type: run_function
code: ./src/parallel_train
entry_script: parallel_train.py
environment:
image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
conda_file: ./src/parallel_train/conda.yaml
program_arguments: >-
--drop_cols ${{inputs.drop_cols}}
--target_col ${{inputs.target_col}}
--date_col ${{inputs.date_col}}
--lagging_orders ${{inputs.lagging_orders}}
--model_folder ${{outputs.model_folder}}