-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgcp_deploy.py
139 lines (128 loc) · 3.57 KB
/
gcp_deploy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import argparse
from google.cloud import aiplatform
def main(opt):
models = aiplatform.Model.list()
model_found = False
for model in models:
if model.display_name == opt.model_name:
print(model.resource_name)
model_found = True
break
if not model_found:
print("No model found, creating...")
model = aiplatform.Model.upload(
display_name=opt.model_name,
serving_container_image_uri=opt.image_uri,
serving_container_ports=[8080],
serving_container_predict_route="/predict"
)
endpoints = aiplatform.Endpoint.list()
endpoint_found = False
for endpoint in endpoints:
if endpoint.display_name == opt.endpoint_name:
print(endpoint.resource_name)
endpoint_found = True
break
if not endpoint_found:
print("No endpoint found, creating...")
endpoint = aiplatform.Endpoint.create(
display_name=opt.endpoint_name
)
print(vars(endpoint))
models = endpoint.list_models()
if len(models) > 0:
print("undeploying previous models...")
endpoint.undeploy_all()
#NVIDIA_TESLA_V100
#NVIDIA_TESLA_T4
endpoint.deploy(
model,
deployed_model_display_name="stable_diffusion_endpoint_deployed",
traffic_percentage=100,
machine_type=opt.machine_type,
min_replica_count=opt.min_replica_count,
max_replica_count=opt.max_replica_count,
accelerator_type=opt.gpu_type,
accelerator_count=opt.accelerator_count,
sync=True,
# Broken in cloud shell
# autoscaling_target_cpu_utilization=opt.cpu_duty_cycle,
# autoscaling_target_accelerator_duty_cycle=opt.accelerator_duty_cycle
)
print("done")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--min-replica-count',
type=int,
default=1,
help='Minimum number of replicas'
)
parser.add_argument(
'--machine-type',
type=str,
default='n1-standard-4',
help='Machine type'
)
parser.add_argument(
'--max-replica-count',
type=int,
default=1,
help='Maximum number of replicas'
)
parser.add_argument(
"--gpu-type",
type=str,
default='NVIDIA_TESLA_T4',
help="GPU type"
)
parser.add_argument(
"--accelerator-count",
type=int,
default=1,
help="GPU count"
)
parser.add_argument(
"--region",
type=str,
default="us-central1",
help="gcp region"
)
parser.add_argument(
"--model-name",
type=str,
default="blip",
help="name of model"
)
parser.add_argument(
"--endpoint-name",
type=str,
default="blip-endpoint",
help="Name of endpoint"
)
parser.add_argument(
"--endpoint-deployed-name",
type=str,
default="blip_endpoint_deployed",
help="Endpoint deployed name"
)
parser.add_argument(
"--image-uri",
type=str,
required=True,
help="name of image in gcr. Ex: gcr.io/project-name/blip:latest"
)
parser.add_argument(
"--accelerator-duty-cycle",
type=int,
default=20,
help="Autoscaling for GPUs."
)
parser.add_argument(
"--cpu-duty-cycle",
type=int,
default=5,
help="Autoscaling for CPUs."
)
opt = parser.parse_args()
main(opt)