Skip to content

Commit

Permalink
fix: pdf summerize and and it to test
Browse files Browse the repository at this point in the history
Signed-off-by: Abirdcfly <[email protected]>
  • Loading branch information
Abirdcfly committed Mar 19, 2024
1 parent 5b8642f commit 4036fcb
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codespell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ jobs:
with:
ignore_words_file: .github/.codespellignore
check_filenames: true
skip: go.*,**/*.drawio,./deploy/charts/*,./config/crd/*,./deploy/llms/*,./deploy/pgvector
skip: go.*,**/*.drawio,./deploy/charts/*,./config/crd/*,./deploy/llms/*,./deploy/pgvector,./pkg/documentloaders/testdata

2 changes: 1 addition & 1 deletion apiserver/graph/generated/generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apiserver/graph/generated/models_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 7 additions & 10 deletions config/samples/app_llmchain_abstract.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Application
metadata:
name: base-chat-document-assistant
namespace: kubeagi-system
namespace: arcadia
spec:
displayName: "AI文档对话助手"
description: "最简单的AI文档对话助手"
Expand Down Expand Up @@ -37,7 +37,7 @@ spec:
ref:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: LLM
name: qwen-0-5b-external
name: app-shared-llm-service
nextNodeName: ["chain-node"]
- name: chain-node
displayName: "llm chain"
Expand All @@ -58,7 +58,7 @@ apiVersion: prompt.arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Prompt
metadata:
name: base-chat-document-assistant
namespace: kubeagi-system
namespace: arcadia
annotations:
arcadia.kubeagi.k8s.com.cn/input-rules: '[{"kind":"Input","length":1}]'
arcadia.kubeagi.k8s.com.cn/output-rules: '[{"length":1}]'
Expand All @@ -73,7 +73,7 @@ apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: DocumentLoader
metadata:
name: base-chat-document-assistant
namespace: kubeagi-system
namespace: arcadia
spec:
displayName: "llm chain"
description: "llm chain"
Expand All @@ -84,16 +84,13 @@ apiVersion: chain.arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: LLMChain
metadata:
name: base-chat-document-assistant
namespace: kubeagi-system
namespace: arcadia
annotations:
arcadia.kubeagi.k8s.com.cn/input-rules: '[{"kind":"LLM","group":"arcadia.kubeagi.k8s.com.cn","length":1},{"kind":"prompt","group":"prompt.arcadia.kubeagi.k8s.com.cn","length":1}]'
arcadia.kubeagi.k8s.com.cn/output-rules: '[{"kind":"Output","length":1}]'
spec:
displayName: "llm chain"
description: "llm chain"
maxNumberOfConccurent: 1
maxTokens: 20480
maxLength: 20480
model: "6ac7baa2-71e7-4ffc-bd49-9356e743ecbb"
memory:
maxTokenLimit: 20480
conversionWindowSize: 2
model: glm-4
2 changes: 1 addition & 1 deletion pkg/appruntime/chain/mpchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (l *MapReduceChain) Run(ctx context.Context, _ client.Client, args map[stri
if err != nil {
return args, fmt.Errorf("failed to run MapReduceChain due to %s", err.Error())
}
args["_answer"] = fmt.Sprintf("Here is the document summary: %s \n", out)
args[base.AgentOutputInArg] = fmt.Sprintf("Here is the document summary: %s \n", out)
return args, nil
}

Expand Down
Binary file added pkg/documentloaders/testdata/llava.pdf
Binary file not shown.
92 changes: 92 additions & 0 deletions tests/example-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,92 @@ function getRespInAppChat() {
fi
}

function fileUploadSummarise() {
appname=$1
namespace=$2
filename=$3
attempt=0
while true; do
info "sleep 3 seconds"
sleep 3
resp=$(curl --max-time $TimeoutSeconds -s --show-error -XPOST --form file=@$filename --form app_name=$appname --form app_namespace=$namespace -H "Content-Type: multipart/form-data" http://127.0.0.1:8081/chat/conversations/file)
doc_data=$(echo $resp | jq -r '.document')
if [ -z "$doc_data" ]; then
echo $resp
EnableAPIServerPortForward
if [[ $resp == *"googleapi: Error"* ]]; then
echo "google api error, will retry after 60s"
sleep 60
fi
attempt=$((attempt + 1))
if [ $attempt -gt $RETRY_COUNT ]; then
echo "❌: Failed. Retry count exceeded."
exit 1
fi
echo "🔄: Failed. Attempt $attempt/$RETRY_COUNT"
continue
fi
echo "👤: ${filename}"
echo "🤖: ${doc_data}"
break
done
file_id=$(echo $resp | jq -r '.document.object')
resp_conversation_id=$(echo $resp | jq -r '.conversation_id')
attempt=0
while true; do
info "sleep 3 seconds to sumerize doc"
sleep 3
data=$(jq -n --arg fileid "$file_id" --arg appname "$appname" --arg query "总结一下" --arg namespace "$namespace" --arg conversationID "$resp_conversation_id" '{"query":$query,"response_mode":"blocking","conversation_id":$conversationID,"app_name":$appname, "app_namespace":$namespace, "files": [$fileid]}')
resp=$(curl --max-time $TimeoutSeconds -s --show-error -XPOST http://127.0.0.1:8081/chat --data "$data")
ai_data=$(echo $resp | jq -r '.message')
references=$(echo $resp | jq -r '.references')
if [ -z "$ai_data" ] || [ "$ai_data" = "null" ]; then
echo $resp
EnableAPIServerPortForward
if [[ $resp == *"googleapi: Error"* ]]; then
echo "google api error, will retry after 60s"
sleep 60
fi
attempt=$((attempt + 1))
if [ $attempt -gt $RETRY_COUNT ]; then
echo "❌: Failed. Retry count exceeded."
exit 1
fi
echo "🔄: Failed. Attempt $attempt/$RETRY_COUNT"
continue
fi
echo "👤: 总结一下"
echo "🤖: ${ai_data}"
echo "🔗: ${references}"
break
done
resp_conversation_id=$(echo $resp | jq -r '.conversation_id')

if [ $testStream == "true" ]; then
attempt=0
while true; do
info "sleep 5 seconds"
sleep 5
info "just test stream mode"
data=$(jq -n --arg fileid "$file_id" --arg appname "$appname" --arg query "总结一下" --arg namespace "$namespace" --arg conversationID "$resp_conversation_id" '{"query":$query,"response_mode":"blocking","conversation_id":$conversationID,"app_name":$appname, "app_namespace":$namespace, "files": [$fileid]}')
curl --max-time $TimeoutSeconds -s --show-error -XPOST http://127.0.0.1:8081/chat --data "$data"
if [[ $? -ne 0 ]]; then
attempt=$((attempt + 1))
if [ $attempt -gt $RETRY_COUNT ]; then
echo "❌: Failed. Retry count exceeded."
exit 1
fi
echo "🔄: Failed. Attempt $attempt/$RETRY_COUNT"
EnableAPIServerPortForward
echo "and wait 60s for google api error"
sleep 60
continue
fi
break
done
fi
}

info "1. create kind cluster"
make kind
df -h
Expand Down Expand Up @@ -645,6 +731,12 @@ while true; do
break
done

info "8.4.6 chat with document"
kubectl apply -f config/samples/app_llmchain_abstract.yaml
waitCRDStatusReady "Application" "arcadia" "base-chat-document-assistant"
fileUploadSummarise "base-chat-document-assistant" "arcadia" "./pkg/documentloaders/testdata/llava.pdf"
getRespInAppChat "base-chat-document-assistant" "arcadia" "what is LLaVA?" ${resp_conversation_id} "false"

# There is uncertainty in the AI replies, most of the time, it will pass the test, a small percentage of the time, the AI will call names in each reply, causing the test to fail, therefore, temporarily disable the following tests
#getRespInAppChat "base-chat-with-bot" "arcadia" "What is your model?" ${resp_conversation_id} "false"
#getRespInAppChat "base-chat-with-bot" "arcadia" "Does your model based on gpt-3.5?" ${resp_conversation_id} "false"
Expand Down

0 comments on commit 4036fcb

Please sign in to comment.