From 2f92e81c3d1a5deacfdc461866418074e9227542 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Wed, 18 Dec 2024 04:00:17 -0800 Subject: [PATCH 1/2] feat: support deepseek v2.5 1210 with sglang --- .../deepseek-v2-5-instruct-sglang/config.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 custom-server/deepseek-v2-5-instruct-sglang/config.yaml diff --git a/custom-server/deepseek-v2-5-instruct-sglang/config.yaml b/custom-server/deepseek-v2-5-instruct-sglang/config.yaml new file mode 100644 index 00000000..f6073517 --- /dev/null +++ b/custom-server/deepseek-v2-5-instruct-sglang/config.yaml @@ -0,0 +1,18 @@ +base_image: + image: lmsysorg/sglang:v0.4.0.post1-cu124 +model_metadata: + repo_id: deepseek-ai/DeepSeek-V2.5-1210 +docker_server: + start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V2.5-1210 --port 8000 --tp 8 --trust-remote-code --enable-dp-attention" + readiness_endpoint: /health + liveness_endpoint: /health + predict_endpoint: /v1/completions + server_port: 8000 +resources: + accelerator: H100:8 + use_gpu: true +runtime: + predict_concurrency : 32 +model_name: DeepSeek V2.5 1210 SGLang +environment_variables: + hf_access_token: null From 940c2051802a70ffece2ecf8e68dc4f8426ccc74 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Wed, 18 Dec 2024 08:48:01 -0800 Subject: [PATCH 2/2] use enable-dp-attention when fp8 --- custom-server/deepseek-v2-5-instruct-sglang/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom-server/deepseek-v2-5-instruct-sglang/config.yaml b/custom-server/deepseek-v2-5-instruct-sglang/config.yaml index f6073517..e85af908 100644 --- a/custom-server/deepseek-v2-5-instruct-sglang/config.yaml +++ b/custom-server/deepseek-v2-5-instruct-sglang/config.yaml @@ -3,7 +3,7 @@ base_image: model_metadata: repo_id: deepseek-ai/DeepSeek-V2.5-1210 docker_server: - start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V2.5-1210 --port 8000 --tp 8 --trust-remote-code --enable-dp-attention" + start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V2.5-1210 --port 8000 --tp 8 --trust-remote-code" readiness_endpoint: /health liveness_endpoint: /health predict_endpoint: /v1/completions