diff --git a/deploy/llama-factory/Chart.yaml b/deploy/llama-factory/Chart.yaml new file mode 100644 index 000000000..ad7ca8025 --- /dev/null +++ b/deploy/llama-factory/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: llama-factory +version: 0.1.1 +appVersion: 0.1.0 +description: llama-factory +type: application + +keywords: + - llama-factory + - model tune +sources: + - https://github.com/huangqg/LLaMA-Factory +maintainers: + - name: huangqg + url: https://github.com/huangqg diff --git a/deploy/llama-factory/README.md b/deploy/llama-factory/README.md new file mode 100644 index 000000000..9725b7620 --- /dev/null +++ b/deploy/llama-factory/README.md @@ -0,0 +1 @@ +# llama-factory \ No newline at end of file diff --git a/deploy/llama-factory/templates/deployment.yaml b/deploy/llama-factory/templates/deployment.yaml new file mode 100644 index 000000000..bff568e43 --- /dev/null +++ b/deploy/llama-factory/templates/deployment.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-factory + namespace: {{ .Release.Namespace }} + labels: + app: llama-factory +spec: + selector: + matchLabels: + app: llama-factory + replicas: 1 + template: + metadata: + labels: + app: llama-factory + spec: + containers: + - image: {{ .Values.image }} + imagePullPolicy: {{ .Values.imagePullPolicy }} + name: llama-factory + ports: + - containerPort: 7860 + protocol: TCP + resources: {{ toYaml .Values.resources | nindent 10 }} + volumeMounts: + - name: model-volume + mountPath: /root/.cache/huggingface + - name: data-volume + mountPath: /app/data + - name: output-volume + mountPath: /app/output + volumes: + - name: model-volume + hostPath: + path: {{ .Values.volume.model }} + - name: data-volume + hostPath: + path: {{ .Values.volume.data }} + - name: output-volume + hostPath: + path: {{ .Values.volume.output }} + terminationGracePeriodSeconds: 30 \ No newline at end of file diff --git a/deploy/llama-factory/templates/ingress.yaml b/deploy/llama-factory/templates/ingress.yaml new file mode 100644 index 000000000..5df44783b --- /dev/null +++ b/deploy/llama-factory/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enable }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + description: llama-factory-ingress + displayName: llama-factory-ingress + httpSend: / + ingress-lb: {{ .Values.ingress.ingressClassName }} + kubernetes.io/ingress.class: {{ .Values.ingress.ingressClassName }} + nginx.ingress.kubernetes.io/configuration-snippet: | + proxy_cache static-cache; + proxy_cache_valid 404 10m; + proxy_cache_use_stale error timeout updating http_404 http_500 http_502 http_503 http_504; + proxy_cache_bypass $http_x_purge; + add_header X-Cache-Status $upstream_cache_status; # check X-Cache-Status to see if it's HIT + rewrite ^/(llama-factory-apis)(/|$)(.*)$ /$3 break; + nginx.ingress.kubernetes.io/enable-access-log: "false" + nginx.ingress.kubernetes.io/enable-rewrite-log: "false" + nginx.ingress.kubernetes.io/load-balance: round_robin + nginx.ingress.kubernetes.io/proxy-body-size: "" + nginx.ingress.kubernetes.io/proxy-buffering: "on" + nginx.ingress.kubernetes.io/proxy-connect-timeout: "60" + nginx.ingress.kubernetes.io/server-alias: "" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/upstream-vhost: $host + labels: + ingress-lb: {{ .Values.ingress.ingressClassName }} + name: llama-factory + namespace: {{ .Release.Namespace }} +spec: + rules: + - host: portal.{{ .Values.ingress.ingressDomain }} + http: + paths: + - backend: + service: + name: llama-factory + port: + number: 7860 + path: / + pathType: ImplementationSpecific +{{- end }} \ No newline at end of file diff --git a/deploy/llama-factory/templates/service.yaml b/deploy/llama-factory/templates/service.yaml new file mode 100644 index 000000000..57a772076 --- /dev/null +++ b/deploy/llama-factory/templates/service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: llama-factory + name: llama-factory + namespace: {{ .Release.Namespace }} +spec: + ports: + - name: http + port: 7860 + protocol: TCP + targetPort: 7860 + selector: + app: llama-factory + sessionAffinity: None + type: ClusterIP diff --git a/deploy/llama-factory/values.yaml b/deploy/llama-factory/values.yaml new file mode 100644 index 000000000..d8366e257 --- /dev/null +++ b/deploy/llama-factory/values.yaml @@ -0,0 +1,23 @@ +image: kubeagi/llama-factory:v0.6.1 +imagePullPolicy: IfNotPresent +resources: + limits: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: "1" + requests: + cpu: "1" + memory: 1Gi + nvidia.com/gpu: "1" + +volume: + model: /path/to/model/or/cache + data: /path/to/data + output: /path/to/output + +# ingress configurations for component +ingress: + # set enable to `true` to enable ingress + enable: true + ingressClassName: portal-ingress + ingressDomain: your.domain.nip.io diff --git a/deploy/llms/Dockerfile.fastchat-server b/deploy/llms/Dockerfile.fastchat-server index cb8139ec1..639f78608 100644 --- a/deploy/llms/Dockerfile.fastchat-server +++ b/deploy/llms/Dockerfile.fastchat-server @@ -17,7 +17,8 @@ RUN apt-get update -y && apt-get install -y python3-distutils curl python3-pip p ARG PYTHON_INDEX_URL=https://pypi.mirrors.ustc.edu.cn/simple/ WORKDIR /git -RUN git clone https://github.com/lm-sys/FastChat.git +RUN git clone https://github.com/lanture1064/FastK8ts.git -WORKDIR /git/FastChat -RUN pip install --upgrade pip -i ${PYTHON_INDEX_URL} && pip install -e . -i ${PYTHON_INDEX_URL} \ No newline at end of file +WORKDIR /git/FastK8ts +RUN pip install --upgrade pip -i ${PYTHON_INDEX_URL} && pip install -e . -i ${PYTHON_INDEX_URL} +RUN pip install pydantic==1.10.11 -i ${PYTHON_INDEX_URL} \ No newline at end of file