forked from Azure/azureml-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
job.yml
39 lines (31 loc) · 1.25 KB
/
job.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
code:
local_path: src
# This is the command that will start up the dask cluster and run the script `prep-nyctaxi.py` with the following parameters.
# For an interactive session, just remove the --script. That will just start the cluster and mount the dataset.
command: >-
python startDask.py
--script prep-nyctaxi.py
--nyc_taxi_dataset {inputs.nyc_taxi_dataset}
inputs:
nyc_taxi_dataset:
data:
path: https://azuremlexamples.blob.core.windows.net/datasets/nyctaxi/
mode: mount
environment:
conda_file: file:conda.yml
docker:
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
compute:
# use a sku with lots of disk space and memory
target: azureml:cpu-cluster-lg
instance_count: 4
distribution:
# The job below is currently launched with `type: pytorch` since that
# gives the full flexibility of assigning the work to the
# no pytorch is actually used in this job
type: pytorch
experiment_name: dask-nyctaxi-example
description: This sample shows how to run a distributed DASK job on AzureML.
The 24GB NYC Taxi dataset is read in CSV format by a 4 node DASK cluster,
processed and then written as job output in parquet format.