From b5af8e8b18b2ff710642d6724712fa3a2c394b93 Mon Sep 17 00:00:00 2001 From: friendli-bot <104493380+friendli-bot@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:08:46 +0900 Subject: [PATCH] Update 2024-11-13 (#31) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .speakeasy/gen.lock | 118 +-- .speakeasy/gen.yaml | 2 +- .speakeasy/workflow.lock | 10 +- README.md | 6 - RELEASES.md | 52 +- USAGE.md | 6 - docs/models/completionsstreambody.md | 17 - docs/models/dedicatedchatcompletebody.md | 31 + .../dedicatedchatcompletebodylogitbias.md | 9 + .../dedicatedchatcompletebodystreamoptions.md | 12 + ...=> dedicatedchatcompletebodytoolchoice.md} | 6 +- ...atedchatcompletebodytoolchoicefunction.md} | 2 +- ...dicatedchatcompletebodytoolchoiceobject.md | 9 + ...edicatedchatcompletebodytoolchoicetype.md} | 2 +- docs/models/dedicatedchatcompleterequest.md | 8 +- docs/models/dedicatedchatstreambody.md | 31 + ...md => dedicatedchatstreambodylogitbias.md} | 2 +- ...> dedicatedchatstreambodystreamoptions.md} | 2 +- ...d => dedicatedchatstreambodytoolchoice.md} | 6 +- ...icatedchatstreambodytoolchoicefunction.md} | 2 +- ...dedicatedchatstreambodytoolchoiceobject.md | 9 + ... dedicatedchatstreambodytoolchoicetype.md} | 2 +- docs/models/dedicatedchatstreamrequest.md | 8 +- .../dedicatedcompletionscompletebody.md | 17 + ...nscompletebodycompletionsbodywithprompt.md | 43 ++ ...nscompletebodycompletionsbodywithtokens.md | 43 ++ .../dedicatedcompletionscompleterequest.md | 8 +- docs/models/dedicatedcompletionsstreambody.md | 17 + ...ionsstreambodycompletionsbodywithprompt.md | 43 ++ ...ionsstreambodycompletionsbodywithtokens.md | 43 ++ .../dedicatedcompletionsstreamrequest.md | 8 +- docs/models/dedicateddetokenizationbody.md | 9 + docs/models/dedicateddetokenizationrequest.md | 8 +- docs/models/dedicatedtokenizationbody.md | 9 + docs/models/dedicatedtokenizationrequest.md | 8 +- ...ebody.md => serverlesschatcompletebody.md} | 2 +- docs/models/serverlesschatcompleterequest.md | 8 +- ...eambody.md => serverlesschatstreambody.md} | 8 +- .../serverlesschatstreambodylogitbias.md | 9 + .../serverlesschatstreambodystreamoptions.md | 12 + ... => serverlesschatstreambodytoolchoice.md} | 2 +- ...erlesschatstreambodytoolchoicefunction.md} | 2 +- ...serverlesschatstreambodytoolchoicetype.md} | 2 +- docs/models/serverlesschatstreamrequest.md | 8 +- ...d => serverlesscompletionscompletebody.md} | 2 +- .../serverlesscompletionscompleterequest.md | 8 +- .../models/serverlesscompletionsstreambody.md | 17 + ...onsstreambodycompletionsbodywithprompt.md} | 2 +- ...onsstreambodycompletionsbodywithtokens.md} | 2 +- .../serverlesscompletionsstreamrequest.md | 8 +- ...ody.md => serverlessdetokenizationbody.md} | 2 +- .../models/serverlessdetokenizationrequest.md | 8 +- ...nbody.md => serverlesstokenizationbody.md} | 2 +- docs/models/serverlesstokenizationrequest.md | 8 +- ...serverlesstoolassistedchatcompletebody.md} | 8 +- ...stoolassistedchatcompletebodytoolchoice.md | 24 + ...istedchatcompletebodytoolchoicefunction.md | 8 + ...ssistedchatcompletebodytoolchoiceobject.md | 9 + ...lassistedchatcompletebodytoolchoicetype.md | 10 + ...rverlesstoolassistedchatcompleterequest.md | 8 +- ...> serverlesstoolassistedchatstreambody.md} | 8 +- ...esstoolassistedchatstreambodytoolchoice.md | 24 + ...ssistedchatstreambodytoolchoicefunction.md | 8 + ...lassistedchatstreambodytoolchoiceobject.md | 9 + ...oolassistedchatstreambodytoolchoicetype.md | 10 + ...serverlesstoolassistedchatstreamrequest.md | 8 +- ...ssistedchatcompletebodytoolchoiceobject.md | 9 - ...lassistedchatstreambodytoolchoiceobject.md | 9 - docs/models/toolchoiceobject.md | 8 +- docs/sdks/chat/README.md | 6 +- docs/sdks/completions/README.md | 24 +- docs/sdks/friendlichat/README.md | 20 +- docs/sdks/friendlicompletions/README.md | 28 +- docs/sdks/friendlitoken/README.md | 28 +- docs/sdks/toolassistedchat/README.md | 18 +- pyproject.toml | 2 +- src/friendli/_version.py | 2 +- src/friendli/chat.py | 66 +- src/friendli/completions.py | 64 +- src/friendli/friendli_chat.py | 120 ++-- src/friendli/friendli_completions.py | 62 +- src/friendli/friendli_token.py | 40 +- src/friendli/models/__init__.py | 318 +++++--- .../models/dedicatedchatcompletebody.py | 387 ++++++++++ .../models/dedicatedchatcompleteop.py | 11 +- .../models/dedicatedchatstreambody.py | 383 ++++++++++ src/friendli/models/dedicatedchatstreamop.py | 11 +- .../dedicatedcompletionscompletebody.py | 679 ++++++++++++++++++ .../models/dedicatedcompletionscompleteop.py | 12 +- .../models/dedicatedcompletionsstreambody.py | 679 ++++++++++++++++++ .../models/dedicatedcompletionsstreamop.py | 11 +- .../models/dedicateddetokenizationbody.py | 21 + .../models/dedicateddetokenizationop.py | 11 +- .../models/dedicatedtokenizationbody.py | 20 + .../models/dedicatedtokenizationop.py | 11 +- ...ebody.py => serverlesschatcompletebody.py} | 4 +- .../models/serverlesschatcompleteop.py | 11 +- ...eambody.py => serverlesschatstreambody.py} | 44 +- src/friendli/models/serverlesschatstreamop.py | 11 +- ...y => serverlesscompletionscompletebody.py} | 6 +- .../models/serverlesscompletionscompleteop.py | 12 +- ....py => serverlesscompletionsstreambody.py} | 20 +- .../models/serverlesscompletionsstreamop.py | 11 +- ...ody.py => serverlessdetokenizationbody.py} | 4 +- .../models/serverlessdetokenizationop.py | 11 +- ...nbody.py => serverlesstokenizationbody.py} | 4 +- .../models/serverlesstokenizationop.py | 11 +- ...serverlesstoolassistedchatcompletebody.py} | 34 +- .../serverlesstoolassistedchatcompleteop.py | 14 +- ...> serverlesstoolassistedchatstreambody.py} | 34 +- .../serverlesstoolassistedchatstreamop.py | 14 +- src/friendli/sdkconfiguration.py | 4 +- src/friendli/token.py | 28 +- src/friendli/toolassistedchat.py | 52 +- 114 files changed, 3563 insertions(+), 680 deletions(-) delete mode 100644 docs/models/completionsstreambody.md create mode 100644 docs/models/dedicatedchatcompletebody.md create mode 100644 docs/models/dedicatedchatcompletebodylogitbias.md create mode 100644 docs/models/dedicatedchatcompletebodystreamoptions.md rename docs/models/{toolassistedchatstreambodytoolchoice.md => dedicatedchatcompletebodytoolchoice.md} (76%) rename docs/models/{toolassistedchatstreambodytoolchoicefunction.md => dedicatedchatcompletebodytoolchoicefunction.md} (97%) create mode 100644 docs/models/dedicatedchatcompletebodytoolchoiceobject.md rename docs/models/{toolassistedchatstreambodytoolchoicetype.md => dedicatedchatcompletebodytoolchoicetype.md} (78%) create mode 100644 docs/models/dedicatedchatstreambody.md rename docs/models/{chatstreambodylogitbias.md => dedicatedchatstreambodylogitbias.md} (90%) rename docs/models/{chatstreambodystreamoptions.md => dedicatedchatstreambodystreamoptions.md} (98%) rename docs/models/{toolassistedchatcompletebodytoolchoice.md => dedicatedchatstreambodytoolchoice.md} (76%) rename docs/models/{chatstreambodytoolchoicefunction.md => dedicatedchatstreambodytoolchoicefunction.md} (97%) create mode 100644 docs/models/dedicatedchatstreambodytoolchoiceobject.md rename docs/models/{chatstreambodytoolchoicetype.md => dedicatedchatstreambodytoolchoicetype.md} (80%) create mode 100644 docs/models/dedicatedcompletionscompletebody.md create mode 100644 docs/models/dedicatedcompletionscompletebodycompletionsbodywithprompt.md create mode 100644 docs/models/dedicatedcompletionscompletebodycompletionsbodywithtokens.md create mode 100644 docs/models/dedicatedcompletionsstreambody.md create mode 100644 docs/models/dedicatedcompletionsstreambodycompletionsbodywithprompt.md create mode 100644 docs/models/dedicatedcompletionsstreambodycompletionsbodywithtokens.md create mode 100644 docs/models/dedicateddetokenizationbody.md create mode 100644 docs/models/dedicatedtokenizationbody.md rename docs/models/{chatcompletebody.md => serverlesschatcompletebody.md} (99%) rename docs/models/{chatstreambody.md => serverlesschatstreambody.md} (99%) create mode 100644 docs/models/serverlesschatstreambodylogitbias.md create mode 100644 docs/models/serverlesschatstreambodystreamoptions.md rename docs/models/{chatstreambodytoolchoice.md => serverlesschatstreambodytoolchoice.md} (94%) rename docs/models/{toolassistedchatcompletebodytoolchoicefunction.md => serverlesschatstreambodytoolchoicefunction.md} (96%) rename docs/models/{toolassistedchatcompletebodytoolchoicetype.md => serverlesschatstreambodytoolchoicetype.md} (78%) rename docs/models/{completionscompletebody.md => serverlesscompletionscompletebody.md} (87%) create mode 100644 docs/models/serverlesscompletionsstreambody.md rename docs/models/{completionsstreambodycompletionsbodywithprompt.md => serverlesscompletionsstreambodycompletionsbodywithprompt.md} (99%) rename docs/models/{completionsstreambodycompletionsbodywithtokens.md => serverlesscompletionsstreambodycompletionsbodywithtokens.md} (99%) rename docs/models/{detokenizationbody.md => serverlessdetokenizationbody.md} (98%) rename docs/models/{tokenizationbody.md => serverlesstokenizationbody.md} (98%) rename docs/models/{toolassistedchatstreambody.md => serverlesstoolassistedchatcompletebody.md} (99%) create mode 100644 docs/models/serverlesstoolassistedchatcompletebodytoolchoice.md create mode 100644 docs/models/serverlesstoolassistedchatcompletebodytoolchoicefunction.md create mode 100644 docs/models/serverlesstoolassistedchatcompletebodytoolchoiceobject.md create mode 100644 docs/models/serverlesstoolassistedchatcompletebodytoolchoicetype.md rename docs/models/{toolassistedchatcompletebody.md => serverlesstoolassistedchatstreambody.md} (99%) create mode 100644 docs/models/serverlesstoolassistedchatstreambodytoolchoice.md create mode 100644 docs/models/serverlesstoolassistedchatstreambodytoolchoicefunction.md create mode 100644 docs/models/serverlesstoolassistedchatstreambodytoolchoiceobject.md create mode 100644 docs/models/serverlesstoolassistedchatstreambodytoolchoicetype.md delete mode 100644 docs/models/toolassistedchatcompletebodytoolchoiceobject.md delete mode 100644 docs/models/toolassistedchatstreambodytoolchoiceobject.md create mode 100644 src/friendli/models/dedicatedchatcompletebody.py create mode 100644 src/friendli/models/dedicatedchatstreambody.py create mode 100644 src/friendli/models/dedicatedcompletionscompletebody.py create mode 100644 src/friendli/models/dedicatedcompletionsstreambody.py create mode 100644 src/friendli/models/dedicateddetokenizationbody.py create mode 100644 src/friendli/models/dedicatedtokenizationbody.py rename src/friendli/models/{chatcompletebody.py => serverlesschatcompletebody.py} (99%) rename src/friendli/models/{chatstreambody.py => serverlesschatstreambody.py} (93%) rename src/friendli/models/{completionscompletebody.py => serverlesscompletionscompletebody.py} (74%) rename src/friendli/models/{completionsstreambody.py => serverlesscompletionsstreambody.py} (98%) rename src/friendli/models/{detokenizationbody.py => serverlessdetokenizationbody.py} (88%) rename src/friendli/models/{tokenizationbody.py => serverlesstokenizationbody.py} (86%) rename src/friendli/models/{toolassistedchatcompletebody.py => serverlesstoolassistedchatcompletebody.py} (93%) rename src/friendli/models/{toolassistedchatstreambody.py => serverlesstoolassistedchatstreambody.py} (93%) diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index 940b63d..04c0ee3 100644 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,12 +1,12 @@ lockVersion: 2.0.0 id: 980a2a11-4390-48fe-a741-a78ab1e9c399 management: - docChecksum: 3a4f53a9ab3ce64bcb945a82227e7f6b + docChecksum: dd967aed72d467e4ea04efdeb61c253c docVersion: v1 speakeasyVersion: 1.438.1 generationVersion: 2.457.2 - releaseVersion: 0.2.25 - configChecksum: 533f9dcf7826916eb3c25f717b76c9f5 + releaseVersion: 0.2.30 + configChecksum: 529ed5cbe412da8b239f9a2ba654e4c1 repoURL: https://github.com/friendliai/friendli-python-internal.git installationURL: https://github.com/friendliai/friendli-python-internal.git published: true @@ -50,32 +50,42 @@ generatedFiles: - docs/models/chatchoicemessage.md - docs/models/chatchoicetoolcalls.md - docs/models/chatchoicetype.md - - docs/models/chatcompletebody.md - docs/models/chatresult.md - - docs/models/chatstreambody.md - - docs/models/chatstreambodylogitbias.md - - docs/models/chatstreambodystreamoptions.md - - docs/models/chatstreambodytoolchoice.md - - docs/models/chatstreambodytoolchoicefunction.md - - docs/models/chatstreambodytoolchoicetype.md - docs/models/completionsbodywithprompt.md - docs/models/completionsbodywithtokens.md - docs/models/completionschoice.md - - docs/models/completionscompletebody.md - docs/models/completionsresult.md - - docs/models/completionsstreambody.md - - docs/models/completionsstreambodycompletionsbodywithprompt.md - - docs/models/completionsstreambodycompletionsbodywithtokens.md - docs/models/content.md - docs/models/data.md + - docs/models/dedicatedchatcompletebody.md + - docs/models/dedicatedchatcompletebodylogitbias.md + - docs/models/dedicatedchatcompletebodystreamoptions.md + - docs/models/dedicatedchatcompletebodytoolchoice.md + - docs/models/dedicatedchatcompletebodytoolchoicefunction.md + - docs/models/dedicatedchatcompletebodytoolchoiceobject.md + - docs/models/dedicatedchatcompletebodytoolchoicetype.md - docs/models/dedicatedchatcompleterequest.md + - docs/models/dedicatedchatstreambody.md + - docs/models/dedicatedchatstreambodylogitbias.md + - docs/models/dedicatedchatstreambodystreamoptions.md + - docs/models/dedicatedchatstreambodytoolchoice.md + - docs/models/dedicatedchatstreambodytoolchoicefunction.md + - docs/models/dedicatedchatstreambodytoolchoiceobject.md + - docs/models/dedicatedchatstreambodytoolchoicetype.md - docs/models/dedicatedchatstreamrequest.md + - docs/models/dedicatedcompletionscompletebody.md + - docs/models/dedicatedcompletionscompletebodycompletionsbodywithprompt.md + - docs/models/dedicatedcompletionscompletebodycompletionsbodywithtokens.md - docs/models/dedicatedcompletionscompleterequest.md + - docs/models/dedicatedcompletionsstreambody.md + - docs/models/dedicatedcompletionsstreambodycompletionsbodywithprompt.md + - docs/models/dedicatedcompletionsstreambodycompletionsbodywithtokens.md - docs/models/dedicatedcompletionsstreamrequest.md + - docs/models/dedicateddetokenizationbody.md - docs/models/dedicateddetokenizationrequest.md + - docs/models/dedicatedtokenizationbody.md - docs/models/dedicatedtokenizationrequest.md - docs/models/delta.md - - docs/models/detokenizationbody.md - docs/models/detokenizationresult.md - docs/models/event.md - docs/models/filebuiltintool.md @@ -93,13 +103,36 @@ generatedFiles: - docs/models/responseformat.md - docs/models/role.md - docs/models/security.md + - docs/models/serverlesschatcompletebody.md - docs/models/serverlesschatcompleterequest.md + - docs/models/serverlesschatstreambody.md + - docs/models/serverlesschatstreambodylogitbias.md + - docs/models/serverlesschatstreambodystreamoptions.md + - docs/models/serverlesschatstreambodytoolchoice.md + - docs/models/serverlesschatstreambodytoolchoicefunction.md + - docs/models/serverlesschatstreambodytoolchoicetype.md - docs/models/serverlesschatstreamrequest.md + - docs/models/serverlesscompletionscompletebody.md - docs/models/serverlesscompletionscompleterequest.md + - docs/models/serverlesscompletionsstreambody.md + - docs/models/serverlesscompletionsstreambodycompletionsbodywithprompt.md + - docs/models/serverlesscompletionsstreambodycompletionsbodywithtokens.md - docs/models/serverlesscompletionsstreamrequest.md + - docs/models/serverlessdetokenizationbody.md - docs/models/serverlessdetokenizationrequest.md + - docs/models/serverlesstokenizationbody.md - docs/models/serverlesstokenizationrequest.md + - docs/models/serverlesstoolassistedchatcompletebody.md + - docs/models/serverlesstoolassistedchatcompletebodytoolchoice.md + - docs/models/serverlesstoolassistedchatcompletebodytoolchoicefunction.md + - docs/models/serverlesstoolassistedchatcompletebodytoolchoiceobject.md + - docs/models/serverlesstoolassistedchatcompletebodytoolchoicetype.md - docs/models/serverlesstoolassistedchatcompleterequest.md + - docs/models/serverlesstoolassistedchatstreambody.md + - docs/models/serverlesstoolassistedchatstreambodytoolchoice.md + - docs/models/serverlesstoolassistedchatstreambodytoolchoicefunction.md + - docs/models/serverlesstoolassistedchatstreambodytoolchoiceobject.md + - docs/models/serverlesstoolassistedchatstreambodytoolchoicetype.md - docs/models/serverlesstoolassistedchatstreamrequest.md - docs/models/streamedchatchoice.md - docs/models/streamedchatchoicefunction.md @@ -115,20 +148,9 @@ generatedFiles: - docs/models/streamedtoolassistedchatresultdata.md - docs/models/streamoptions.md - docs/models/systemmessage.md - - docs/models/tokenizationbody.md - docs/models/tokenizationresult.md - docs/models/tokensequence.md - docs/models/tool.md - - docs/models/toolassistedchatcompletebody.md - - docs/models/toolassistedchatcompletebodytoolchoice.md - - docs/models/toolassistedchatcompletebodytoolchoicefunction.md - - docs/models/toolassistedchatcompletebodytoolchoiceobject.md - - docs/models/toolassistedchatcompletebodytoolchoicetype.md - - docs/models/toolassistedchatstreambody.md - - docs/models/toolassistedchatstreambodytoolchoice.md - - docs/models/toolassistedchatstreambodytoolchoicefunction.md - - docs/models/toolassistedchatstreambodytoolchoiceobject.md - - docs/models/toolassistedchatstreambodytoolchoicetype.md - docs/models/toolassistedchattool.md - docs/models/toolcalls.md - docs/models/toolchoice.md @@ -176,22 +198,23 @@ generatedFiles: - src/friendli/models/__init__.py - src/friendli/models/assistantmessage.py - src/friendli/models/chatchoice.py - - src/friendli/models/chatcompletebody.py - src/friendli/models/chatresult.py - - src/friendli/models/chatstreambody.py - src/friendli/models/completionsbodywithprompt.py - src/friendli/models/completionsbodywithtokens.py - src/friendli/models/completionschoice.py - - src/friendli/models/completionscompletebody.py - src/friendli/models/completionsresult.py - - src/friendli/models/completionsstreambody.py + - src/friendli/models/dedicatedchatcompletebody.py - src/friendli/models/dedicatedchatcompleteop.py + - src/friendli/models/dedicatedchatstreambody.py - src/friendli/models/dedicatedchatstreamop.py + - src/friendli/models/dedicatedcompletionscompletebody.py - src/friendli/models/dedicatedcompletionscompleteop.py + - src/friendli/models/dedicatedcompletionsstreambody.py - src/friendli/models/dedicatedcompletionsstreamop.py + - src/friendli/models/dedicateddetokenizationbody.py - src/friendli/models/dedicateddetokenizationop.py + - src/friendli/models/dedicatedtokenizationbody.py - src/friendli/models/dedicatedtokenizationop.py - - src/friendli/models/detokenizationbody.py - src/friendli/models/detokenizationresult.py - src/friendli/models/filebuiltintool.py - src/friendli/models/function.py @@ -202,13 +225,21 @@ generatedFiles: - src/friendli/models/responseformat.py - src/friendli/models/sdkerror.py - src/friendli/models/security.py + - src/friendli/models/serverlesschatcompletebody.py - src/friendli/models/serverlesschatcompleteop.py + - src/friendli/models/serverlesschatstreambody.py - src/friendli/models/serverlesschatstreamop.py + - src/friendli/models/serverlesscompletionscompletebody.py - src/friendli/models/serverlesscompletionscompleteop.py + - src/friendli/models/serverlesscompletionsstreambody.py - src/friendli/models/serverlesscompletionsstreamop.py + - src/friendli/models/serverlessdetokenizationbody.py - src/friendli/models/serverlessdetokenizationop.py + - src/friendli/models/serverlesstokenizationbody.py - src/friendli/models/serverlesstokenizationop.py + - src/friendli/models/serverlesstoolassistedchatcompletebody.py - src/friendli/models/serverlesstoolassistedchatcompleteop.py + - src/friendli/models/serverlesstoolassistedchatstreambody.py - src/friendli/models/serverlesstoolassistedchatstreamop.py - src/friendli/models/streamedchatchoice.py - src/friendli/models/streamedchatresult.py @@ -217,12 +248,9 @@ generatedFiles: - src/friendli/models/streamedcompletionstokensampled.py - src/friendli/models/streamedtoolassistedchatresult.py - src/friendli/models/systemmessage.py - - src/friendli/models/tokenizationbody.py - src/friendli/models/tokenizationresult.py - src/friendli/models/tokensequence.py - src/friendli/models/tool.py - - src/friendli/models/toolassistedchatcompletebody.py - - src/friendli/models/toolassistedchatstreambody.py - src/friendli/models/toolassistedchattool.py - src/friendli/models/toolmessage.py - src/friendli/models/usage.py @@ -354,23 +382,23 @@ examples: serverlessToolAssistedChatComplete: "": requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200, "tools": [{"type": "math:calculator"}, {"type": "web:search"}]} + application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200, "tools": [{"type": "math:calculator"}]} responses: "200": application/json: {"choices": [{"index": 0, "message": {"role": ""}, "finish_reason": ""}, {"index": 0, "message": {"role": ""}, "finish_reason": ""}, {"index": 0, "message": {"role": ""}, "finish_reason": ""}], "usage": {"prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12}} Example: requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} + application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200, "tools": [{"type": "web:url"}]} responses: "200": - application/json: {"choices": [{"index": 0, "message": {"role": "assistant", "content": "Hello there, how may I assist you today?"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 9, "completion_tokens": 11, "total_tokens": 20}} + application/json: {"choices": [{"index": 0, "message": {"role": "assistant", "content": "The result is 9."}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 9, "completion_tokens": 7, "total_tokens": 16}} serverlessToolAssistedChatStream: "": requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200, "tools": [{"type": "math:calculator"}, {"type": "web:search"}]} + application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200, "tools": [{"type": "math:calculator"}]} Example: requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} + application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200, "tools": [{"type": "math:statistics"}]} responses: "200": text/event-stream: "event: tool_status\ndata: {\"tool_call_id\":\"call_3QrfStXSU6fGdOGPcETocIAq\",\"name\":\"math:calculator\",\"status\":\"STARTED\",\"parameters\":[{\"name\":\"expression\",\"value\":\"150 * 1.60934\"}],\"result\":\"None\",\"files\":null,\"message\":null,\"error\":null,\"usage\":null,\"timestamp\":1726277121}\n\nevent: tool_status\ndata: {\"tool_call_id\":\"call_3QrfStXSU6fGdOGPcETocIAq\",\"name\":\"math:calculator\",\"status\":\"ENDED\",\"parameters\":[{\"name\":\"expression\",\"value\":\"150 * 1.60934\"}],\"result\":\"\\\"{\\\\\\\"result\\\\\\\": \\\\\\\"150 * 1.60934=241.401000000000\\\\\\\"}\\\"\",\"files\":null,\"message\":null,\"error\":null,\"usage\":null,\"timestamp\":1726277121}\n\ndata: {\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"To\"},\"finish_reason\":null,\"logprobs\":null}],\"created\":1726277121}\n\n...\n\ndata: {\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\".\"},\"finish_reason\":null,\"logprobs\":null}],\"created\":1726277121}\n\ndata: {\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\",\"logprobs\":null}],\"created\":1726277121}\n\ndata: [DONE]\n" @@ -420,7 +448,7 @@ examples: dedicatedChatComplete: "": requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} + application/json: {"model": "(endpoint-id):(adapter-route)", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} responses: "200": application/json: {"choices": [{"index": 0, "message": {"role": ""}, "finish_reason": ""}, {"index": 0, "message": {"role": ""}, "finish_reason": ""}, {"index": 0, "message": {"role": ""}, "finish_reason": ""}], "usage": {"prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12}} @@ -433,7 +461,7 @@ examples: dedicatedChatStream: "": requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} + application/json: {"model": "(endpoint-id):(adapter-route)", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} Example: requestBody: application/json: {"model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], "max_tokens": 200} @@ -443,7 +471,7 @@ examples: dedicatedCompletionsComplete: "": requestBody: - application/json: {"prompt": "Say this is a test!", "model": "meta-llama-3.1-8b-instruct", "max_tokens": 200, "top_k": 1} + application/json: {"prompt": "Say this is a test!", "model": "(endpoint-id):(adapter-route)", "max_tokens": 200, "top_k": 1} responses: "200": application/json: {"choices": [{"index": 0, "seed": 42, "text": "This is indeed a test", "tokens": [128000, 2028, 374, 13118, 264, 1296]}, {"index": 0, "seed": 42, "text": "This is indeed a test", "tokens": [128000, 2028, 374, 13118, 264, 1296]}, {"index": 0, "seed": 42, "text": "This is indeed a test", "tokens": [128000, 2028, 374, 13118, 264, 1296]}], "usage": {"prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12}} @@ -456,7 +484,7 @@ examples: dedicatedCompletionsStream: "": requestBody: - application/json: {"prompt": "Say this is a test!", "model": "meta-llama-3.1-8b-instruct", "max_tokens": 200, "top_k": 1} + application/json: {"prompt": "Say this is a test!", "model": "(endpoint-id):(adapter-route)", "max_tokens": 200, "top_k": 1} Example: requestBody: application/json: {"tokens": [74484], "model": "meta-llama-3.1-8b-instruct", "max_tokens": 200, "top_k": 1} @@ -466,7 +494,7 @@ examples: dedicatedTokenization: "": requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "prompt": "What is generative AI?"} + application/json: {"model": "(endpoint-id):(adapter-route)", "prompt": "What is generative AI?"} Example: requestBody: application/json: {"model": "meta-llama-3.1-8b-instruct", "prompt": "What is generative AI?"} @@ -476,7 +504,7 @@ examples: dedicatedDetokenization: "": requestBody: - application/json: {"model": "meta-llama-3.1-8b-instruct", "tokens": [128000, 3923, 374, 1803, 1413, 15592, 30]} + application/json: {"model": "(endpoint-id):(adapter-route)", "tokens": [128000, 3923, 374, 1803, 1413, 15592, 30]} Example: requestBody: application/json: {"model": "meta-llama-3.1-8b-instruct", "tokens": [128000, 3923, 374, 1803, 1413, 15592, 30]} diff --git a/.speakeasy/gen.yaml b/.speakeasy/gen.yaml index 0486763..db85f33 100644 --- a/.speakeasy/gen.yaml +++ b/.speakeasy/gen.yaml @@ -13,7 +13,7 @@ generation: oAuth2ClientCredentialsEnabled: true oAuth2PasswordEnabled: true python: - version: 0.2.25 + version: 0.2.30 additionalDependencies: dev: {} main: {} diff --git a/.speakeasy/workflow.lock b/.speakeasy/workflow.lock index ae60613..02528c3 100644 --- a/.speakeasy/workflow.lock +++ b/.speakeasy/workflow.lock @@ -2,8 +2,8 @@ speakeasyVersion: 1.438.1 sources: Friendli-API-Schema: sourceNamespace: friendli-api-schema - sourceRevisionDigest: sha256:3363dec06e6a2c9b68c43c2dc1f87db438d6bd5a96b380e70f5282b85084b767 - sourceBlobDigest: sha256:158615cc306eef8a979466064629f5f3e7cfa59a0e049133270b42ebd898bdfb + sourceRevisionDigest: sha256:3d85808887e7b0ad7e56d83f96b8d0734c19fd2b2883c5f82f3dfc7d63a44ce2 + sourceBlobDigest: sha256:85241138e1e03824c52d072a27d00ee1931653cad9fc80dd8968fcbcc7c4de03 tags: - latest - main @@ -11,10 +11,10 @@ targets: friendli: source: Friendli-API-Schema sourceNamespace: friendli-api-schema - sourceRevisionDigest: sha256:3363dec06e6a2c9b68c43c2dc1f87db438d6bd5a96b380e70f5282b85084b767 - sourceBlobDigest: sha256:158615cc306eef8a979466064629f5f3e7cfa59a0e049133270b42ebd898bdfb + sourceRevisionDigest: sha256:3d85808887e7b0ad7e56d83f96b8d0734c19fd2b2883c5f82f3dfc7d63a44ce2 + sourceBlobDigest: sha256:85241138e1e03824c52d072a27d00ee1931653cad9fc80dd8968fcbcc7c4de03 codeSamplesNamespace: friendli-api-schema-code-samples - codeSamplesRevisionDigest: sha256:3941ceec891b5a6722ecbec87a6cecbf547490d86229bef1ce1104434d4dc65c + codeSamplesRevisionDigest: sha256:27190dc5935abb5970abadeea5cba28a73f72ae63c7964eff4bb0f6de73613ce workflow: workflowVersion: 1.0.0 speakeasyVersion: latest diff --git a/README.md b/README.md index e9839db..c80f2ec 100644 --- a/README.md +++ b/README.md @@ -143,9 +143,6 @@ res = s.serverless.tool_assisted_chat.complete(model="meta-llama-3.1-8b-instruct { "type": "math:calculator", }, - { - "type": "web:search", - }, ]) if res is not None: @@ -179,9 +176,6 @@ async def main(): { "type": "math:calculator", }, - { - "type": "web:search", - }, ]) if res is not None: # handle response diff --git a/RELEASES.md b/RELEASES.md index 2bc67ef..696c111 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -84,4 +84,54 @@ Based on: ### Generated - [python v0.2.25] . ### Releases -- [PyPI v0.2.25] https://pypi.org/project/friendli/0.2.25 - . \ No newline at end of file +- [PyPI v0.2.25] https://pypi.org/project/friendli/0.2.25 - . + +## 2024-11-13 05:40:16 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.438.1 (2.457.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.2.26] . +### Releases +- [PyPI v0.2.26] https://pypi.org/project/friendli/0.2.26 - . + +## 2024-11-13 05:42:54 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.438.1 (2.457.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.2.27] . +### Releases +- [PyPI v0.2.27] https://pypi.org/project/friendli/0.2.27 - . + +## 2024-11-13 05:48:46 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.438.1 (2.457.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.2.28] . +### Releases +- [PyPI v0.2.28] https://pypi.org/project/friendli/0.2.28 - . + +## 2024-11-13 05:57:34 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.438.1 (2.457.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.2.29] . +### Releases +- [PyPI v0.2.29] https://pypi.org/project/friendli/0.2.29 - . + +## 2024-11-13 06:04:57 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.438.1 (2.457.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.2.30] . +### Releases +- [PyPI v0.2.30] https://pypi.org/project/friendli/0.2.30 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index 7338fa4..e0880a1 100644 --- a/USAGE.md +++ b/USAGE.md @@ -84,9 +84,6 @@ res = s.serverless.tool_assisted_chat.complete(model="meta-llama-3.1-8b-instruct { "type": "math:calculator", }, - { - "type": "web:search", - }, ]) if res is not None: @@ -120,9 +117,6 @@ async def main(): { "type": "math:calculator", }, - { - "type": "web:search", - }, ]) if res is not None: # handle response diff --git a/docs/models/completionsstreambody.md b/docs/models/completionsstreambody.md deleted file mode 100644 index 65aaf9b..0000000 --- a/docs/models/completionsstreambody.md +++ /dev/null @@ -1,17 +0,0 @@ -# CompletionsStreamBody - - -## Supported Types - -### `models.CompletionsStreamBodyCompletionsBodyWithPrompt` - -```python -value: models.CompletionsStreamBodyCompletionsBodyWithPrompt = /* values here */ -``` - -### `models.CompletionsStreamBodyCompletionsBodyWithTokens` - -```python -value: models.CompletionsStreamBodyCompletionsBodyWithTokens = /* values here */ -``` - diff --git a/docs/models/dedicatedchatcompletebody.md b/docs/models/dedicatedchatcompletebody.md new file mode 100644 index 0000000..7830b39 --- /dev/null +++ b/docs/models/dedicatedchatcompletebody.md @@ -0,0 +1,31 @@ +# DedicatedChatCompleteBody + + +## Fields + +| Field | Type | Required | Description | Example | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | +| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | +| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | +| `logit_bias` | [OptionalNullable[models.DedicatedChatCompleteBodyLogitBias]](../models/dedicatedchatcompletebodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | +| `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | | +| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | +| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.

**This field is unsupported when `tools` are specified.**
| | +| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | | +| `parallel_tool_calls` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to enable parallel function calling. | | +| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | | +| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | | +| `response_format` | [OptionalNullable[models.ResponseFormat]](../models/responseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.

Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.

***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| | +| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | +| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | | +| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | +| `stream_options` | [OptionalNullable[models.DedicatedChatCompleteBodyStreamOptions]](../models/dedicatedchatcompletebodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | +| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | +| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | +| `tool_choice` | [Optional[models.DedicatedChatCompleteBodyToolChoice]](../models/dedicatedchatcompletebodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tools` | List[[models.Tool](../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.

**When `tools` are specified, `min_tokens` field is unsupported.**
| | +| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | +| `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | | +| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/dedicatedchatcompletebodylogitbias.md b/docs/models/dedicatedchatcompletebodylogitbias.md new file mode 100644 index 0000000..6748e6d --- /dev/null +++ b/docs/models/dedicatedchatcompletebodylogitbias.md @@ -0,0 +1,9 @@ +# DedicatedChatCompleteBodyLogitBias + +Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. + + +## Fields + +| Field | Type | Required | Description | +| ----------- | ----------- | ----------- | ----------- | \ No newline at end of file diff --git a/docs/models/dedicatedchatcompletebodystreamoptions.md b/docs/models/dedicatedchatcompletebodystreamoptions.md new file mode 100644 index 0000000..d5e62ef --- /dev/null +++ b/docs/models/dedicatedchatcompletebodystreamoptions.md @@ -0,0 +1,12 @@ +# DedicatedChatCompleteBodyStreamOptions + +Options related to stream. +It can only be used when `stream: true`. + + + +## Fields + +| Field | Type | Required | Description | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `include_usage` | *OptionalNullable[bool]* | :heavy_minus_sign: | When set to `true`,
the number of tokens used will be included at the end of the stream result in the form of
`"usage": {"completion_tokens": number, "prompt_tokens": number, "total_tokens": number}`.
| \ No newline at end of file diff --git a/docs/models/toolassistedchatstreambodytoolchoice.md b/docs/models/dedicatedchatcompletebodytoolchoice.md similarity index 76% rename from docs/models/toolassistedchatstreambodytoolchoice.md rename to docs/models/dedicatedchatcompletebodytoolchoice.md index 42cc686..f05d048 100644 --- a/docs/models/toolassistedchatstreambodytoolchoice.md +++ b/docs/models/dedicatedchatcompletebodytoolchoice.md @@ -1,4 +1,4 @@ -# ToolAssistedChatStreamBodyToolChoice +# DedicatedChatCompleteBodyToolChoice Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. @@ -16,9 +16,9 @@ You can also specify a particular tool by `{"type": "function", "function": {"na value: str = /* values here */ ``` -### `models.ToolAssistedChatStreamBodyToolChoiceObject` +### `models.DedicatedChatCompleteBodyToolChoiceObject` ```python -value: models.ToolAssistedChatStreamBodyToolChoiceObject = /* values here */ +value: models.DedicatedChatCompleteBodyToolChoiceObject = /* values here */ ``` diff --git a/docs/models/toolassistedchatstreambodytoolchoicefunction.md b/docs/models/dedicatedchatcompletebodytoolchoicefunction.md similarity index 97% rename from docs/models/toolassistedchatstreambodytoolchoicefunction.md rename to docs/models/dedicatedchatcompletebodytoolchoicefunction.md index 9f7ff02..f5d8ce3 100644 --- a/docs/models/toolassistedchatstreambodytoolchoicefunction.md +++ b/docs/models/dedicatedchatcompletebodytoolchoicefunction.md @@ -1,4 +1,4 @@ -# ToolAssistedChatStreamBodyToolChoiceFunction +# DedicatedChatCompleteBodyToolChoiceFunction ## Fields diff --git a/docs/models/dedicatedchatcompletebodytoolchoiceobject.md b/docs/models/dedicatedchatcompletebodytoolchoiceobject.md new file mode 100644 index 0000000..55736d4 --- /dev/null +++ b/docs/models/dedicatedchatcompletebodytoolchoiceobject.md @@ -0,0 +1,9 @@ +# DedicatedChatCompleteBodyToolChoiceObject + + +## Fields + +| Field | Type | Required | Description | +| -------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| `type` | [models.DedicatedChatCompleteBodyToolChoiceType](../models/dedicatedchatcompletebodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | +| `function` | [models.DedicatedChatCompleteBodyToolChoiceFunction](../models/dedicatedchatcompletebodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/toolassistedchatstreambodytoolchoicetype.md b/docs/models/dedicatedchatcompletebodytoolchoicetype.md similarity index 78% rename from docs/models/toolassistedchatstreambodytoolchoicetype.md rename to docs/models/dedicatedchatcompletebodytoolchoicetype.md index f711aa7..2f4f3d8 100644 --- a/docs/models/toolassistedchatstreambodytoolchoicetype.md +++ b/docs/models/dedicatedchatcompletebodytoolchoicetype.md @@ -1,4 +1,4 @@ -# ToolAssistedChatStreamBodyToolChoiceType +# DedicatedChatCompleteBodyToolChoiceType The type of the tool. Currently, only `function` is supported. diff --git a/docs/models/dedicatedchatcompleterequest.md b/docs/models/dedicatedchatcompleterequest.md index 01a1685..ea83624 100644 --- a/docs/models/dedicatedchatcompleterequest.md +++ b/docs/models/dedicatedchatcompleterequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -| `chat_complete_body` | [models.ChatCompleteBody](../models/chatcompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| -------------------------------------------------------------------------- | -------------------------------------------------------------------------- | -------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| `dedicated_chat_complete_body` | [models.DedicatedChatCompleteBody](../models/dedicatedchatcompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/dedicatedchatstreambody.md b/docs/models/dedicatedchatstreambody.md new file mode 100644 index 0000000..b3325a6 --- /dev/null +++ b/docs/models/dedicatedchatstreambody.md @@ -0,0 +1,31 @@ +# DedicatedChatStreamBody + + +## Fields + +| Field | Type | Required | Description | Example | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | +| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | +| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | +| `logit_bias` | [OptionalNullable[models.DedicatedChatStreamBodyLogitBias]](../models/dedicatedchatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | +| `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | | +| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | +| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.

**This field is unsupported when `tools` are specified.**
| | +| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | | +| `parallel_tool_calls` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to enable parallel function calling. | | +| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | | +| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | | +| `response_format` | [OptionalNullable[models.ResponseFormat]](../models/responseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.

Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.

***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| | +| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | +| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | | +| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | +| `stream_options` | [OptionalNullable[models.DedicatedChatStreamBodyStreamOptions]](../models/dedicatedchatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | +| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | +| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | +| `tool_choice` | [Optional[models.DedicatedChatStreamBodyToolChoice]](../models/dedicatedchatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tools` | List[[models.Tool](../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.

**When `tools` are specified, `min_tokens` field is unsupported.**
| | +| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | +| `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | | +| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/chatstreambodylogitbias.md b/docs/models/dedicatedchatstreambodylogitbias.md similarity index 90% rename from docs/models/chatstreambodylogitbias.md rename to docs/models/dedicatedchatstreambodylogitbias.md index 0819d1b..8b29ef5 100644 --- a/docs/models/chatstreambodylogitbias.md +++ b/docs/models/dedicatedchatstreambodylogitbias.md @@ -1,4 +1,4 @@ -# ChatStreamBodyLogitBias +# DedicatedChatStreamBodyLogitBias Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. diff --git a/docs/models/chatstreambodystreamoptions.md b/docs/models/dedicatedchatstreambodystreamoptions.md similarity index 98% rename from docs/models/chatstreambodystreamoptions.md rename to docs/models/dedicatedchatstreambodystreamoptions.md index 4a7403e..a2ad79e 100644 --- a/docs/models/chatstreambodystreamoptions.md +++ b/docs/models/dedicatedchatstreambodystreamoptions.md @@ -1,4 +1,4 @@ -# ChatStreamBodyStreamOptions +# DedicatedChatStreamBodyStreamOptions Options related to stream. It can only be used when `stream: true`. diff --git a/docs/models/toolassistedchatcompletebodytoolchoice.md b/docs/models/dedicatedchatstreambodytoolchoice.md similarity index 76% rename from docs/models/toolassistedchatcompletebodytoolchoice.md rename to docs/models/dedicatedchatstreambodytoolchoice.md index e71193b..23da632 100644 --- a/docs/models/toolassistedchatcompletebodytoolchoice.md +++ b/docs/models/dedicatedchatstreambodytoolchoice.md @@ -1,4 +1,4 @@ -# ToolAssistedChatCompleteBodyToolChoice +# DedicatedChatStreamBodyToolChoice Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. @@ -16,9 +16,9 @@ You can also specify a particular tool by `{"type": "function", "function": {"na value: str = /* values here */ ``` -### `models.ToolAssistedChatCompleteBodyToolChoiceObject` +### `models.DedicatedChatStreamBodyToolChoiceObject` ```python -value: models.ToolAssistedChatCompleteBodyToolChoiceObject = /* values here */ +value: models.DedicatedChatStreamBodyToolChoiceObject = /* values here */ ``` diff --git a/docs/models/chatstreambodytoolchoicefunction.md b/docs/models/dedicatedchatstreambodytoolchoicefunction.md similarity index 97% rename from docs/models/chatstreambodytoolchoicefunction.md rename to docs/models/dedicatedchatstreambodytoolchoicefunction.md index bb2985b..6b8f977 100644 --- a/docs/models/chatstreambodytoolchoicefunction.md +++ b/docs/models/dedicatedchatstreambodytoolchoicefunction.md @@ -1,4 +1,4 @@ -# ChatStreamBodyToolChoiceFunction +# DedicatedChatStreamBodyToolChoiceFunction ## Fields diff --git a/docs/models/dedicatedchatstreambodytoolchoiceobject.md b/docs/models/dedicatedchatstreambodytoolchoiceobject.md new file mode 100644 index 0000000..d24d630 --- /dev/null +++ b/docs/models/dedicatedchatstreambodytoolchoiceobject.md @@ -0,0 +1,9 @@ +# DedicatedChatStreamBodyToolChoiceObject + + +## Fields + +| Field | Type | Required | Description | +| ---------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| `type` | [models.DedicatedChatStreamBodyToolChoiceType](../models/dedicatedchatstreambodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | +| `function` | [models.DedicatedChatStreamBodyToolChoiceFunction](../models/dedicatedchatstreambodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/chatstreambodytoolchoicetype.md b/docs/models/dedicatedchatstreambodytoolchoicetype.md similarity index 80% rename from docs/models/chatstreambodytoolchoicetype.md rename to docs/models/dedicatedchatstreambodytoolchoicetype.md index 873fb23..1b4ad1c 100644 --- a/docs/models/chatstreambodytoolchoicetype.md +++ b/docs/models/dedicatedchatstreambodytoolchoicetype.md @@ -1,4 +1,4 @@ -# ChatStreamBodyToolChoiceType +# DedicatedChatStreamBodyToolChoiceType The type of the tool. Currently, only `function` is supported. diff --git a/docs/models/dedicatedchatstreamrequest.md b/docs/models/dedicatedchatstreamrequest.md index 3c1ad58..1b69ec6 100644 --- a/docs/models/dedicatedchatstreamrequest.md +++ b/docs/models/dedicatedchatstreamrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ---------------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------- | -| `chat_stream_body` | [models.ChatStreamBody](../models/chatstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | +| `dedicated_chat_stream_body` | [models.DedicatedChatStreamBody](../models/dedicatedchatstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/dedicatedcompletionscompletebody.md b/docs/models/dedicatedcompletionscompletebody.md new file mode 100644 index 0000000..21b16b7 --- /dev/null +++ b/docs/models/dedicatedcompletionscompletebody.md @@ -0,0 +1,17 @@ +# DedicatedCompletionsCompleteBody + + +## Supported Types + +### `models.DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt` + +```python +value: models.DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt = /* values here */ +``` + +### `models.DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens` + +```python +value: models.DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens = /* values here */ +``` + diff --git a/docs/models/dedicatedcompletionscompletebodycompletionsbodywithprompt.md b/docs/models/dedicatedcompletionscompletebodycompletionsbodywithprompt.md new file mode 100644 index 0000000..c329d0a --- /dev/null +++ b/docs/models/dedicatedcompletionscompletebodycompletionsbodywithprompt.md @@ -0,0 +1,43 @@ +# DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt + + +## Fields + +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `prompt` | *str* | :heavy_check_mark: | The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required. | Say this is a test! | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `bad_word_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument. | | +| `bad_words` | List[*str*] | :heavy_minus_sign: | Text phrases that should not be generated.
For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf.
Before checking whether a bard word is included in the result, the word is converted into tokens.
We recommend using `bad_word_tokens` because it is clearer.
For example, after tokenization, phrases "clear" and " clear" can result in different token sequences due to the prepended space character.
Defaults to empty list.
| | +| `beam_compat_no_post_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_compat_pre_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_search_type` | *OptionalNullable[str]* | :heavy_minus_sign: | One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`. | | +| `early_stopping` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument. | | +| `embedding_to_replace` | List[*float*] | :heavy_minus_sign: | A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`. | | +| `encoder_no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument. | | +| `encoder_repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument. | | +| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | +| `forced_output_tokens` | List[*int*] | :heavy_minus_sign: | A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation. | | +| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | +| `include_output_logits` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logits to the generation output. | | +| `include_output_logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logprobs to the generation output. | | +| `length_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument. | | +| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | +| `max_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument. | | +| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. | | +| `min_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument. | | +| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | | +| `no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument. | | +| `num_beams` | *OptionalNullable[int]* | :heavy_minus_sign: | Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument. | | +| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | | +| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | | +| `response_format` | [OptionalNullable[models.ResponseFormat]](../models/responseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.

Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.

***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| | +| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | +| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation.
The stop phrases are excluded from the result.
This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead.
Defaults to empty list.
| | +| `stop_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Stop generating further tokens when generated token corresponds to any of the tokens in the sequence.
If beam search is enabled, all of the active beams should contain the stop token to terminate generation.
| | +| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search. | | +| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | +| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | +| `token_index_to_replace` | List[*int*] | :heavy_minus_sign: | A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`. | | +| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | 1 | +| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/dedicatedcompletionscompletebodycompletionsbodywithtokens.md b/docs/models/dedicatedcompletionscompletebodycompletionsbodywithtokens.md new file mode 100644 index 0000000..aa2009b --- /dev/null +++ b/docs/models/dedicatedcompletionscompletebodycompletionsbodywithtokens.md @@ -0,0 +1,43 @@ +# DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens + + +## Fields + +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tokens` | List[*int*] | :heavy_check_mark: | The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required. | | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `bad_word_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument. | | +| `bad_words` | List[*str*] | :heavy_minus_sign: | Text phrases that should not be generated.
For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf.
Before checking whether a bard word is included in the result, the word is converted into tokens.
We recommend using `bad_word_tokens` because it is clearer.
For example, after tokenization, phrases "clear" and " clear" can result in different token sequences due to the prepended space character.
Defaults to empty list.
| | +| `beam_compat_no_post_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_compat_pre_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_search_type` | *OptionalNullable[str]* | :heavy_minus_sign: | One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`. | | +| `early_stopping` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument. | | +| `embedding_to_replace` | List[*float*] | :heavy_minus_sign: | A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`. | | +| `encoder_no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument. | | +| `encoder_repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument. | | +| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | +| `forced_output_tokens` | List[*int*] | :heavy_minus_sign: | A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation. | | +| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | +| `include_output_logits` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logits to the generation output. | | +| `include_output_logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logprobs to the generation output. | | +| `length_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument. | | +| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | +| `max_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument. | | +| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. | | +| `min_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument. | | +| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | | +| `no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument. | | +| `num_beams` | *OptionalNullable[int]* | :heavy_minus_sign: | Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument. | | +| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | | +| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | | +| `response_format` | [OptionalNullable[models.ResponseFormat]](../models/responseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.

Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.

***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| | +| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | +| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation.
The stop phrases are excluded from the result.
This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead.
Defaults to empty list.
| | +| `stop_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Stop generating further tokens when generated token corresponds to any of the tokens in the sequence.
If beam search is enabled, all of the active beams should contain the stop token to terminate generation.
| | +| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search. | | +| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | +| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | +| `token_index_to_replace` | List[*int*] | :heavy_minus_sign: | A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`. | | +| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | 1 | +| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/dedicatedcompletionscompleterequest.md b/docs/models/dedicatedcompletionscompleterequest.md index d396a98..dd2205b 100644 --- a/docs/models/dedicatedcompletionscompleterequest.md +++ b/docs/models/dedicatedcompletionscompleterequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | -| `completions_complete_body` | [models.CompletionsCompleteBody](../models/completionscompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | +| `dedicated_completions_complete_body` | [models.DedicatedCompletionsCompleteBody](../models/dedicatedcompletionscompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/dedicatedcompletionsstreambody.md b/docs/models/dedicatedcompletionsstreambody.md new file mode 100644 index 0000000..a9b7b80 --- /dev/null +++ b/docs/models/dedicatedcompletionsstreambody.md @@ -0,0 +1,17 @@ +# DedicatedCompletionsStreamBody + + +## Supported Types + +### `models.DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt` + +```python +value: models.DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt = /* values here */ +``` + +### `models.DedicatedCompletionsStreamBodyCompletionsBodyWithTokens` + +```python +value: models.DedicatedCompletionsStreamBodyCompletionsBodyWithTokens = /* values here */ +``` + diff --git a/docs/models/dedicatedcompletionsstreambodycompletionsbodywithprompt.md b/docs/models/dedicatedcompletionsstreambodycompletionsbodywithprompt.md new file mode 100644 index 0000000..54f7428 --- /dev/null +++ b/docs/models/dedicatedcompletionsstreambodycompletionsbodywithprompt.md @@ -0,0 +1,43 @@ +# DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt + + +## Fields + +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `prompt` | *str* | :heavy_check_mark: | The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required. | Say this is a test! | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `bad_word_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument. | | +| `bad_words` | List[*str*] | :heavy_minus_sign: | Text phrases that should not be generated.
For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf.
Before checking whether a bard word is included in the result, the word is converted into tokens.
We recommend using `bad_word_tokens` because it is clearer.
For example, after tokenization, phrases "clear" and " clear" can result in different token sequences due to the prepended space character.
Defaults to empty list.
| | +| `beam_compat_no_post_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_compat_pre_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_search_type` | *OptionalNullable[str]* | :heavy_minus_sign: | One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`. | | +| `early_stopping` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument. | | +| `embedding_to_replace` | List[*float*] | :heavy_minus_sign: | A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`. | | +| `encoder_no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument. | | +| `encoder_repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument. | | +| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | +| `forced_output_tokens` | List[*int*] | :heavy_minus_sign: | A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation. | | +| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | +| `include_output_logits` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logits to the generation output. | | +| `include_output_logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logprobs to the generation output. | | +| `length_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument. | | +| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | +| `max_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument. | | +| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. | | +| `min_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument. | | +| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | | +| `no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument. | | +| `num_beams` | *OptionalNullable[int]* | :heavy_minus_sign: | Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument. | | +| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | | +| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | | +| `response_format` | [OptionalNullable[models.ResponseFormat]](../models/responseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.

Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.

***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| | +| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | +| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation.
The stop phrases are excluded from the result.
This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead.
Defaults to empty list.
| | +| `stop_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Stop generating further tokens when generated token corresponds to any of the tokens in the sequence.
If beam search is enabled, all of the active beams should contain the stop token to terminate generation.
| | +| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | +| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | +| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | +| `token_index_to_replace` | List[*int*] | :heavy_minus_sign: | A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`. | | +| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | 1 | +| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/dedicatedcompletionsstreambodycompletionsbodywithtokens.md b/docs/models/dedicatedcompletionsstreambodycompletionsbodywithtokens.md new file mode 100644 index 0000000..4c57dbc --- /dev/null +++ b/docs/models/dedicatedcompletionsstreambodycompletionsbodywithtokens.md @@ -0,0 +1,43 @@ +# DedicatedCompletionsStreamBodyCompletionsBodyWithTokens + + +## Fields + +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tokens` | List[*int*] | :heavy_check_mark: | The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required. | | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `bad_word_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument. | | +| `bad_words` | List[*str*] | :heavy_minus_sign: | Text phrases that should not be generated.
For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf.
Before checking whether a bard word is included in the result, the word is converted into tokens.
We recommend using `bad_word_tokens` because it is clearer.
For example, after tokenization, phrases "clear" and " clear" can result in different token sequences due to the prepended space character.
Defaults to empty list.
| | +| `beam_compat_no_post_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_compat_pre_normalization` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `beam_search_type` | *OptionalNullable[str]* | :heavy_minus_sign: | One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`. | | +| `early_stopping` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument. | | +| `embedding_to_replace` | List[*float*] | :heavy_minus_sign: | A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`. | | +| `encoder_no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument. | | +| `encoder_repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument. | | +| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | +| `forced_output_tokens` | List[*int*] | :heavy_minus_sign: | A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation. | | +| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | +| `include_output_logits` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logits to the generation output. | | +| `include_output_logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to include the output logprobs to the generation output. | | +| `length_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument. | | +| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | +| `max_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument. | | +| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. | | +| `min_total_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument. | | +| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | | +| `no_repeat_ngram` | *OptionalNullable[int]* | :heavy_minus_sign: | If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument. | | +| `num_beams` | *OptionalNullable[int]* | :heavy_minus_sign: | Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument. | | +| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | | +| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | | +| `response_format` | [OptionalNullable[models.ResponseFormat]](../models/responseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.

Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.

***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| | +| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | +| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation.
The stop phrases are excluded from the result.
This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead.
Defaults to empty list.
| | +| `stop_tokens` | List[[models.TokenSequence](../models/tokensequence.md)] | :heavy_minus_sign: | Stop generating further tokens when generated token corresponds to any of the tokens in the sequence.
If beam search is enabled, all of the active beams should contain the stop token to terminate generation.
| | +| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | +| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | +| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | +| `token_index_to_replace` | List[*int*] | :heavy_minus_sign: | A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`. | | +| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | 1 | +| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/dedicatedcompletionsstreamrequest.md b/docs/models/dedicatedcompletionsstreamrequest.md index 2ddc945..95dd1e6 100644 --- a/docs/models/dedicatedcompletionsstreamrequest.md +++ b/docs/models/dedicatedcompletionsstreamrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------------ | ------------------------------------------------------------------ | ------------------------------------------------------------------ | ------------------------------------------------------------------ | -| `completions_stream_body` | [models.CompletionsStreamBody](../models/completionsstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | +| `dedicated_completions_stream_body` | [models.DedicatedCompletionsStreamBody](../models/dedicatedcompletionsstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/dedicateddetokenizationbody.md b/docs/models/dedicateddetokenizationbody.md new file mode 100644 index 0000000..75a8bc1 --- /dev/null +++ b/docs/models/dedicateddetokenizationbody.md @@ -0,0 +1,9 @@ +# DedicatedDetokenizationBody + + +## Fields + +| Field | Type | Required | Description | Example | +| ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `tokens` | List[*int*] | :heavy_minus_sign: | A token sequence to detokenize. | [
128000,
3923,
374,
1803,
1413,
15592,
30
] | \ No newline at end of file diff --git a/docs/models/dedicateddetokenizationrequest.md b/docs/models/dedicateddetokenizationrequest.md index 0cf894b..3ccf31f 100644 --- a/docs/models/dedicateddetokenizationrequest.md +++ b/docs/models/dedicateddetokenizationrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| `detokenization_body` | [models.DetokenizationBody](../models/detokenizationbody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------ | +| `dedicated_detokenization_body` | [models.DedicatedDetokenizationBody](../models/dedicateddetokenizationbody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/dedicatedtokenizationbody.md b/docs/models/dedicatedtokenizationbody.md new file mode 100644 index 0000000..97e87ce --- /dev/null +++ b/docs/models/dedicatedtokenizationbody.md @@ -0,0 +1,9 @@ +# DedicatedTokenizationBody + + +## Fields + +| Field | Type | Required | Description | Example | +| ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `prompt` | *str* | :heavy_check_mark: | Input text prompt to tokenize. | What is generative AI? | \ No newline at end of file diff --git a/docs/models/dedicatedtokenizationrequest.md b/docs/models/dedicatedtokenizationrequest.md index 51e751a..df5db32 100644 --- a/docs/models/dedicatedtokenizationrequest.md +++ b/docs/models/dedicatedtokenizationrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -| `tokenization_body` | [models.TokenizationBody](../models/tokenizationbody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| -------------------------------------------------------------------------- | -------------------------------------------------------------------------- | -------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| `dedicated_tokenization_body` | [models.DedicatedTokenizationBody](../models/dedicatedtokenizationbody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/chatcompletebody.md b/docs/models/serverlesschatcompletebody.md similarity index 99% rename from docs/models/chatcompletebody.md rename to docs/models/serverlesschatcompletebody.md index 9743d4a..33c617d 100644 --- a/docs/models/chatcompletebody.md +++ b/docs/models/serverlesschatcompletebody.md @@ -1,4 +1,4 @@ -# ChatCompleteBody +# ServerlessChatCompleteBody ## Fields diff --git a/docs/models/serverlesschatcompleterequest.md b/docs/models/serverlesschatcompleterequest.md index 781fc42..62e803c 100644 --- a/docs/models/serverlesschatcompleterequest.md +++ b/docs/models/serverlesschatcompleterequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -| `chat_complete_body` | [models.ChatCompleteBody](../models/chatcompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | +| `serverless_chat_complete_body` | [models.ServerlessChatCompleteBody](../models/serverlesschatcompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/chatstreambody.md b/docs/models/serverlesschatstreambody.md similarity index 99% rename from docs/models/chatstreambody.md rename to docs/models/serverlesschatstreambody.md index e462622..c40d009 100644 --- a/docs/models/chatstreambody.md +++ b/docs/models/serverlesschatstreambody.md @@ -1,4 +1,4 @@ -# ChatStreamBody +# ServerlessChatStreamBody ## Fields @@ -9,7 +9,7 @@ | `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | -| `logit_bias` | [OptionalNullable[models.ChatStreamBodyLogitBias]](../models/chatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | +| `logit_bias` | [OptionalNullable[models.ServerlessChatStreamBodyLogitBias]](../models/serverlesschatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | | `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | | | `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | | `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.

**This field is unsupported when `tools` are specified.**
| | @@ -21,10 +21,10 @@ | `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | | `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | | | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | -| `stream_options` | [OptionalNullable[models.ChatStreamBodyStreamOptions]](../models/chatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | +| `stream_options` | [OptionalNullable[models.ServerlessChatStreamBodyStreamOptions]](../models/serverlesschatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ChatStreamBodyToolChoice]](../models/chatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tool_choice` | [Optional[models.ServerlessChatStreamBodyToolChoice]](../models/serverlesschatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | | `tools` | List[[models.Tool](../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.

**When `tools` are specified, `min_tokens` field is unsupported.**
| | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | | diff --git a/docs/models/serverlesschatstreambodylogitbias.md b/docs/models/serverlesschatstreambodylogitbias.md new file mode 100644 index 0000000..73e0ccb --- /dev/null +++ b/docs/models/serverlesschatstreambodylogitbias.md @@ -0,0 +1,9 @@ +# ServerlessChatStreamBodyLogitBias + +Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. + + +## Fields + +| Field | Type | Required | Description | +| ----------- | ----------- | ----------- | ----------- | \ No newline at end of file diff --git a/docs/models/serverlesschatstreambodystreamoptions.md b/docs/models/serverlesschatstreambodystreamoptions.md new file mode 100644 index 0000000..4389c00 --- /dev/null +++ b/docs/models/serverlesschatstreambodystreamoptions.md @@ -0,0 +1,12 @@ +# ServerlessChatStreamBodyStreamOptions + +Options related to stream. +It can only be used when `stream: true`. + + + +## Fields + +| Field | Type | Required | Description | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `include_usage` | *OptionalNullable[bool]* | :heavy_minus_sign: | When set to `true`,
the number of tokens used will be included at the end of the stream result in the form of
`"usage": {"completion_tokens": number, "prompt_tokens": number, "total_tokens": number}`.
| \ No newline at end of file diff --git a/docs/models/chatstreambodytoolchoice.md b/docs/models/serverlesschatstreambodytoolchoice.md similarity index 94% rename from docs/models/chatstreambodytoolchoice.md rename to docs/models/serverlesschatstreambodytoolchoice.md index b2323a5..d92b4e5 100644 --- a/docs/models/chatstreambodytoolchoice.md +++ b/docs/models/serverlesschatstreambodytoolchoice.md @@ -1,4 +1,4 @@ -# ChatStreamBodyToolChoice +# ServerlessChatStreamBodyToolChoice Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. diff --git a/docs/models/toolassistedchatcompletebodytoolchoicefunction.md b/docs/models/serverlesschatstreambodytoolchoicefunction.md similarity index 96% rename from docs/models/toolassistedchatcompletebodytoolchoicefunction.md rename to docs/models/serverlesschatstreambodytoolchoicefunction.md index 44659d8..d21439b 100644 --- a/docs/models/toolassistedchatcompletebodytoolchoicefunction.md +++ b/docs/models/serverlesschatstreambodytoolchoicefunction.md @@ -1,4 +1,4 @@ -# ToolAssistedChatCompleteBodyToolChoiceFunction +# ServerlessChatStreamBodyToolChoiceFunction ## Fields diff --git a/docs/models/toolassistedchatcompletebodytoolchoicetype.md b/docs/models/serverlesschatstreambodytoolchoicetype.md similarity index 78% rename from docs/models/toolassistedchatcompletebodytoolchoicetype.md rename to docs/models/serverlesschatstreambodytoolchoicetype.md index 8eae86c..d11bcb7 100644 --- a/docs/models/toolassistedchatcompletebodytoolchoicetype.md +++ b/docs/models/serverlesschatstreambodytoolchoicetype.md @@ -1,4 +1,4 @@ -# ToolAssistedChatCompleteBodyToolChoiceType +# ServerlessChatStreamBodyToolChoiceType The type of the tool. Currently, only `function` is supported. diff --git a/docs/models/serverlesschatstreamrequest.md b/docs/models/serverlesschatstreamrequest.md index 7385fae..21c4b01 100644 --- a/docs/models/serverlesschatstreamrequest.md +++ b/docs/models/serverlesschatstreamrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ---------------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------- | -| `chat_stream_body` | [models.ChatStreamBody](../models/chatstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | +| `serverless_chat_stream_body` | [models.ServerlessChatStreamBody](../models/serverlesschatstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/completionscompletebody.md b/docs/models/serverlesscompletionscompletebody.md similarity index 87% rename from docs/models/completionscompletebody.md rename to docs/models/serverlesscompletionscompletebody.md index 28a811e..c054bbe 100644 --- a/docs/models/completionscompletebody.md +++ b/docs/models/serverlesscompletionscompletebody.md @@ -1,4 +1,4 @@ -# CompletionsCompleteBody +# ServerlessCompletionsCompleteBody ## Supported Types diff --git a/docs/models/serverlesscompletionscompleterequest.md b/docs/models/serverlesscompletionscompleterequest.md index 86fa0f2..13d968e 100644 --- a/docs/models/serverlesscompletionscompleterequest.md +++ b/docs/models/serverlesscompletionscompleterequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | -| `completions_complete_body` | [models.CompletionsCompleteBody](../models/completionscompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | +| `serverless_completions_complete_body` | [models.ServerlessCompletionsCompleteBody](../models/serverlesscompletionscompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/serverlesscompletionsstreambody.md b/docs/models/serverlesscompletionsstreambody.md new file mode 100644 index 0000000..d34ffb7 --- /dev/null +++ b/docs/models/serverlesscompletionsstreambody.md @@ -0,0 +1,17 @@ +# ServerlessCompletionsStreamBody + + +## Supported Types + +### `models.ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt` + +```python +value: models.ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt = /* values here */ +``` + +### `models.ServerlessCompletionsStreamBodyCompletionsBodyWithTokens` + +```python +value: models.ServerlessCompletionsStreamBodyCompletionsBodyWithTokens = /* values here */ +``` + diff --git a/docs/models/completionsstreambodycompletionsbodywithprompt.md b/docs/models/serverlesscompletionsstreambodycompletionsbodywithprompt.md similarity index 99% rename from docs/models/completionsstreambodycompletionsbodywithprompt.md rename to docs/models/serverlesscompletionsstreambodycompletionsbodywithprompt.md index d253cef..1e54b3b 100644 --- a/docs/models/completionsstreambodycompletionsbodywithprompt.md +++ b/docs/models/serverlesscompletionsstreambodycompletionsbodywithprompt.md @@ -1,4 +1,4 @@ -# CompletionsStreamBodyCompletionsBodyWithPrompt +# ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt ## Fields diff --git a/docs/models/completionsstreambodycompletionsbodywithtokens.md b/docs/models/serverlesscompletionsstreambodycompletionsbodywithtokens.md similarity index 99% rename from docs/models/completionsstreambodycompletionsbodywithtokens.md rename to docs/models/serverlesscompletionsstreambodycompletionsbodywithtokens.md index 1423cfc..60f3d5b 100644 --- a/docs/models/completionsstreambodycompletionsbodywithtokens.md +++ b/docs/models/serverlesscompletionsstreambodycompletionsbodywithtokens.md @@ -1,4 +1,4 @@ -# CompletionsStreamBodyCompletionsBodyWithTokens +# ServerlessCompletionsStreamBodyCompletionsBodyWithTokens ## Fields diff --git a/docs/models/serverlesscompletionsstreamrequest.md b/docs/models/serverlesscompletionsstreamrequest.md index 96dbf69..eb02b9d 100644 --- a/docs/models/serverlesscompletionsstreamrequest.md +++ b/docs/models/serverlesscompletionsstreamrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------------ | ------------------------------------------------------------------ | ------------------------------------------------------------------ | ------------------------------------------------------------------ | -| `completions_stream_body` | [models.CompletionsStreamBody](../models/completionsstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| -------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| `serverless_completions_stream_body` | [models.ServerlessCompletionsStreamBody](../models/serverlesscompletionsstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/detokenizationbody.md b/docs/models/serverlessdetokenizationbody.md similarity index 98% rename from docs/models/detokenizationbody.md rename to docs/models/serverlessdetokenizationbody.md index c3dbf1d..bf1f099 100644 --- a/docs/models/detokenizationbody.md +++ b/docs/models/serverlessdetokenizationbody.md @@ -1,4 +1,4 @@ -# DetokenizationBody +# ServerlessDetokenizationBody ## Fields diff --git a/docs/models/serverlessdetokenizationrequest.md b/docs/models/serverlessdetokenizationrequest.md index 0565d3c..633b67e 100644 --- a/docs/models/serverlessdetokenizationrequest.md +++ b/docs/models/serverlessdetokenizationrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| `detokenization_body` | [models.DetokenizationBody](../models/detokenizationbody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | +| `serverless_detokenization_body` | [models.ServerlessDetokenizationBody](../models/serverlessdetokenizationbody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/tokenizationbody.md b/docs/models/serverlesstokenizationbody.md similarity index 98% rename from docs/models/tokenizationbody.md rename to docs/models/serverlesstokenizationbody.md index 7912c74..e69bffd 100644 --- a/docs/models/tokenizationbody.md +++ b/docs/models/serverlesstokenizationbody.md @@ -1,4 +1,4 @@ -# TokenizationBody +# ServerlessTokenizationBody ## Fields diff --git a/docs/models/serverlesstokenizationrequest.md b/docs/models/serverlesstokenizationrequest.md index 00ae3ec..f39b62d 100644 --- a/docs/models/serverlesstokenizationrequest.md +++ b/docs/models/serverlesstokenizationrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -| `tokenization_body` | [models.TokenizationBody](../models/tokenizationbody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | +| `serverless_tokenization_body` | [models.ServerlessTokenizationBody](../models/serverlesstokenizationbody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/toolassistedchatstreambody.md b/docs/models/serverlesstoolassistedchatcompletebody.md similarity index 99% rename from docs/models/toolassistedchatstreambody.md rename to docs/models/serverlesstoolassistedchatcompletebody.md index 226367f..817c770 100644 --- a/docs/models/toolassistedchatstreambody.md +++ b/docs/models/serverlesstoolassistedchatcompletebody.md @@ -1,4 +1,4 @@ -# ToolAssistedChatStreamBody +# ServerlessToolAssistedChatCompleteBody ## Fields @@ -6,7 +6,7 @@ | Field | Type | Required | Description | Example | | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | -| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | +| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is 3 + 6?"
}
] | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | | `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | @@ -22,7 +22,7 @@ | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ToolAssistedChatStreamBodyToolChoice]](../models/toolassistedchatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | -| `tools` | List[[models.ToolAssistedChatTool](../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| | +| `tool_choice` | [Optional[models.ServerlessToolAssistedChatCompleteBodyToolChoice]](../models/serverlesstoolassistedchatcompletebodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tools` | List[[models.ToolAssistedChatTool](../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| math:calculator | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatcompletebodytoolchoice.md b/docs/models/serverlesstoolassistedchatcompletebodytoolchoice.md new file mode 100644 index 0000000..ea1a4d1 --- /dev/null +++ b/docs/models/serverlesstoolassistedchatcompletebodytoolchoice.md @@ -0,0 +1,24 @@ +# ServerlessToolAssistedChatCompleteBodyToolChoice + +Determines the tool calling behavior of the model. +When set to `none`, the model will bypass tool execution and generate a response directly. +In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. +Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. +You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`. + + + +## Supported Types + +### `str` + +```python +value: str = /* values here */ +``` + +### `models.ServerlessToolAssistedChatCompleteBodyToolChoiceObject` + +```python +value: models.ServerlessToolAssistedChatCompleteBodyToolChoiceObject = /* values here */ +``` + diff --git a/docs/models/serverlesstoolassistedchatcompletebodytoolchoicefunction.md b/docs/models/serverlesstoolassistedchatcompletebodytoolchoicefunction.md new file mode 100644 index 0000000..6e4439c --- /dev/null +++ b/docs/models/serverlesstoolassistedchatcompletebodytoolchoicefunction.md @@ -0,0 +1,8 @@ +# ServerlessToolAssistedChatCompleteBodyToolChoiceFunction + + +## Fields + +| Field | Type | Required | Description | +| ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | +| `name` | *str* | :heavy_check_mark: | The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatcompletebodytoolchoiceobject.md b/docs/models/serverlesstoolassistedchatcompletebodytoolchoiceobject.md new file mode 100644 index 0000000..ef3957d --- /dev/null +++ b/docs/models/serverlesstoolassistedchatcompletebodytoolchoiceobject.md @@ -0,0 +1,9 @@ +# ServerlessToolAssistedChatCompleteBodyToolChoiceObject + + +## Fields + +| Field | Type | Required | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| `type` | [models.ServerlessToolAssistedChatCompleteBodyToolChoiceType](../models/serverlesstoolassistedchatcompletebodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | +| `function` | [models.ServerlessToolAssistedChatCompleteBodyToolChoiceFunction](../models/serverlesstoolassistedchatcompletebodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatcompletebodytoolchoicetype.md b/docs/models/serverlesstoolassistedchatcompletebodytoolchoicetype.md new file mode 100644 index 0000000..0ed91a7 --- /dev/null +++ b/docs/models/serverlesstoolassistedchatcompletebodytoolchoicetype.md @@ -0,0 +1,10 @@ +# ServerlessToolAssistedChatCompleteBodyToolChoiceType + +The type of the tool. Currently, only `function` is supported. + + +## Values + +| Name | Value | +| ---------- | ---------- | +| `FUNCTION` | function | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatcompleterequest.md b/docs/models/serverlesstoolassistedchatcompleterequest.md index 131fd06..2b3b8b9 100644 --- a/docs/models/serverlesstoolassistedchatcompleterequest.md +++ b/docs/models/serverlesstoolassistedchatcompleterequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -| `tool_assisted_chat_complete_body` | [models.ToolAssistedChatCompleteBody](../models/toolassistedchatcompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| `serverless_tool_assisted_chat_complete_body` | [models.ServerlessToolAssistedChatCompleteBody](../models/serverlesstoolassistedchatcompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/toolassistedchatcompletebody.md b/docs/models/serverlesstoolassistedchatstreambody.md similarity index 99% rename from docs/models/toolassistedchatcompletebody.md rename to docs/models/serverlesstoolassistedchatstreambody.md index 91056e7..57f4701 100644 --- a/docs/models/toolassistedchatcompletebody.md +++ b/docs/models/serverlesstoolassistedchatstreambody.md @@ -1,4 +1,4 @@ -# ToolAssistedChatCompleteBody +# ServerlessToolAssistedChatStreamBody ## Fields @@ -6,7 +6,7 @@ | Field | Type | Required | Description | Example | | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | -| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | +| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is 3 + 6?"
}
] | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | | `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | @@ -22,7 +22,7 @@ | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ToolAssistedChatCompleteBodyToolChoice]](../models/toolassistedchatcompletebodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | -| `tools` | List[[models.ToolAssistedChatTool](../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| | +| `tool_choice` | [Optional[models.ServerlessToolAssistedChatStreamBodyToolChoice]](../models/serverlesstoolassistedchatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tools` | List[[models.ToolAssistedChatTool](../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| math:calculator | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatstreambodytoolchoice.md b/docs/models/serverlesstoolassistedchatstreambodytoolchoice.md new file mode 100644 index 0000000..f98a813 --- /dev/null +++ b/docs/models/serverlesstoolassistedchatstreambodytoolchoice.md @@ -0,0 +1,24 @@ +# ServerlessToolAssistedChatStreamBodyToolChoice + +Determines the tool calling behavior of the model. +When set to `none`, the model will bypass tool execution and generate a response directly. +In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. +Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. +You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`. + + + +## Supported Types + +### `str` + +```python +value: str = /* values here */ +``` + +### `models.ServerlessToolAssistedChatStreamBodyToolChoiceObject` + +```python +value: models.ServerlessToolAssistedChatStreamBodyToolChoiceObject = /* values here */ +``` + diff --git a/docs/models/serverlesstoolassistedchatstreambodytoolchoicefunction.md b/docs/models/serverlesstoolassistedchatstreambodytoolchoicefunction.md new file mode 100644 index 0000000..3b30865 --- /dev/null +++ b/docs/models/serverlesstoolassistedchatstreambodytoolchoicefunction.md @@ -0,0 +1,8 @@ +# ServerlessToolAssistedChatStreamBodyToolChoiceFunction + + +## Fields + +| Field | Type | Required | Description | +| ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | +| `name` | *str* | :heavy_check_mark: | The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatstreambodytoolchoiceobject.md b/docs/models/serverlesstoolassistedchatstreambodytoolchoiceobject.md new file mode 100644 index 0000000..81a3d4d --- /dev/null +++ b/docs/models/serverlesstoolassistedchatstreambodytoolchoiceobject.md @@ -0,0 +1,9 @@ +# ServerlessToolAssistedChatStreamBodyToolChoiceObject + + +## Fields + +| Field | Type | Required | Description | +| ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | +| `type` | [models.ServerlessToolAssistedChatStreamBodyToolChoiceType](../models/serverlesstoolassistedchatstreambodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | +| `function` | [models.ServerlessToolAssistedChatStreamBodyToolChoiceFunction](../models/serverlesstoolassistedchatstreambodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatstreambodytoolchoicetype.md b/docs/models/serverlesstoolassistedchatstreambodytoolchoicetype.md new file mode 100644 index 0000000..d4bf908 --- /dev/null +++ b/docs/models/serverlesstoolassistedchatstreambodytoolchoicetype.md @@ -0,0 +1,10 @@ +# ServerlessToolAssistedChatStreamBodyToolChoiceType + +The type of the tool. Currently, only `function` is supported. + + +## Values + +| Name | Value | +| ---------- | ---------- | +| `FUNCTION` | function | \ No newline at end of file diff --git a/docs/models/serverlesstoolassistedchatstreamrequest.md b/docs/models/serverlesstoolassistedchatstreamrequest.md index d9108c9..5a0a701 100644 --- a/docs/models/serverlesstoolassistedchatstreamrequest.md +++ b/docs/models/serverlesstoolassistedchatstreamrequest.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | -| `tool_assisted_chat_stream_body` | [models.ToolAssistedChatStreamBody](../models/toolassistedchatstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ | +| `serverless_tool_assisted_chat_stream_body` | [models.ServerlessToolAssistedChatStreamBody](../models/serverlesstoolassistedchatstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | \ No newline at end of file diff --git a/docs/models/toolassistedchatcompletebodytoolchoiceobject.md b/docs/models/toolassistedchatcompletebodytoolchoiceobject.md deleted file mode 100644 index 542ed7e..0000000 --- a/docs/models/toolassistedchatcompletebodytoolchoiceobject.md +++ /dev/null @@ -1,9 +0,0 @@ -# ToolAssistedChatCompleteBodyToolChoiceObject - - -## Fields - -| Field | Type | Required | Description | -| -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | -| `type` | [models.ToolAssistedChatCompleteBodyToolChoiceType](../models/toolassistedchatcompletebodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | -| `function` | [models.ToolAssistedChatCompleteBodyToolChoiceFunction](../models/toolassistedchatcompletebodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/toolassistedchatstreambodytoolchoiceobject.md b/docs/models/toolassistedchatstreambodytoolchoiceobject.md deleted file mode 100644 index 05cdd78..0000000 --- a/docs/models/toolassistedchatstreambodytoolchoiceobject.md +++ /dev/null @@ -1,9 +0,0 @@ -# ToolAssistedChatStreamBodyToolChoiceObject - - -## Fields - -| Field | Type | Required | Description | -| ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -| `type` | [models.ToolAssistedChatStreamBodyToolChoiceType](../models/toolassistedchatstreambodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | -| `function` | [models.ToolAssistedChatStreamBodyToolChoiceFunction](../models/toolassistedchatstreambodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/toolchoiceobject.md b/docs/models/toolchoiceobject.md index 7bebdbf..60fa8b8 100644 --- a/docs/models/toolchoiceobject.md +++ b/docs/models/toolchoiceobject.md @@ -3,7 +3,7 @@ ## Fields -| Field | Type | Required | Description | -| ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | -| `type` | [models.ChatStreamBodyToolChoiceType](../models/chatstreambodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | -| `function` | [models.ChatStreamBodyToolChoiceFunction](../models/chatstreambodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------ | +| `type` | [models.ServerlessChatStreamBodyToolChoiceType](../models/serverlesschatstreambodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. | +| `function` | [models.ServerlessChatStreamBodyToolChoiceFunction](../models/serverlesschatstreambodytoolchoicefunction.md) | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/sdks/chat/README.md b/docs/sdks/chat/README.md index d3681d5..6f16496 100644 --- a/docs/sdks/chat/README.md +++ b/docs/sdks/chat/README.md @@ -121,7 +121,7 @@ if res is not None: | `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | -| `logit_bias` | [OptionalNullable[models.ChatStreamBodyLogitBias]](../../models/chatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | +| `logit_bias` | [OptionalNullable[models.ServerlessChatStreamBodyLogitBias]](../../models/serverlesschatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | | `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | | | `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | | `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.

**This field is unsupported when `tools` are specified.**
| | @@ -133,10 +133,10 @@ if res is not None: | `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | | `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | | | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | -| `stream_options` | [OptionalNullable[models.ChatStreamBodyStreamOptions]](../../models/chatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | +| `stream_options` | [OptionalNullable[models.ServerlessChatStreamBodyStreamOptions]](../../models/serverlesschatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ChatStreamBodyToolChoice]](../../models/chatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tool_choice` | [Optional[models.ServerlessChatStreamBodyToolChoice]](../../models/serverlesschatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | | `tools` | List[[models.Tool](../../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.

**When `tools` are specified, `min_tokens` field is unsupported.**
| | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | | diff --git a/docs/sdks/completions/README.md b/docs/sdks/completions/README.md index 93c32c7..cffdb72 100644 --- a/docs/sdks/completions/README.md +++ b/docs/sdks/completions/README.md @@ -22,7 +22,7 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.serverless.completions.complete(completions_complete_body={ +res = s.serverless.completions.complete(serverless_completions_complete_body={ "prompt": "Say this is a test!", "model": "meta-llama-3.1-8b-instruct", "max_tokens": 200, @@ -37,11 +37,11 @@ if res is not None: ### Parameters -| Parameter | Type | Required | Description | -| ------------------------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `completions_complete_body` | [models.CompletionsCompleteBody](../../models/completionscompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | +| Parameter | Type | Required | Description | +| --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | +| `serverless_completions_complete_body` | [models.ServerlessCompletionsCompleteBody](../../models/serverlesscompletionscompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | +| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | ### Response @@ -67,7 +67,7 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.serverless.completions.stream(completions_stream_body={ +res = s.serverless.completions.stream(serverless_completions_stream_body={ "prompt": "Say this is a test!", "model": "meta-llama-3.1-8b-instruct", "max_tokens": 200, @@ -83,11 +83,11 @@ if res is not None: ### Parameters -| Parameter | Type | Required | Description | -| --------------------------------------------------------------------- | --------------------------------------------------------------------- | --------------------------------------------------------------------- | --------------------------------------------------------------------- | -| `completions_stream_body` | [models.CompletionsStreamBody](../../models/completionsstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | +| Parameter | Type | Required | Description | +| ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| `serverless_completions_stream_body` | [models.ServerlessCompletionsStreamBody](../../models/serverlesscompletionsstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | +| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | ### Response diff --git a/docs/sdks/friendlichat/README.md b/docs/sdks/friendlichat/README.md index 056beda..7cfb518 100644 --- a/docs/sdks/friendlichat/README.md +++ b/docs/sdks/friendlichat/README.md @@ -22,7 +22,7 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.dedicated.chat.complete(model="meta-llama-3.1-8b-instruct", messages=[ +res = s.dedicated.chat.complete(model="(endpoint-id):(adapter-route)", messages=[ { "role": "system", "content": "You are a helpful assistant.", @@ -43,12 +43,12 @@ if res is not None: | Parameter | Type | Required | Description | Example | | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | | `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | | `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | -| `logit_bias` | [OptionalNullable[models.LogitBias]](../../models/logitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | +| `logit_bias` | [OptionalNullable[models.DedicatedChatCompleteBodyLogitBias]](../../models/dedicatedchatcompletebodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | | `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | | | `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | | `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.

**This field is unsupported when `tools` are specified.**
| | @@ -60,10 +60,10 @@ if res is not None: | `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | | `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | | | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | -| `stream_options` | [OptionalNullable[models.StreamOptions]](../../models/streamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | +| `stream_options` | [OptionalNullable[models.DedicatedChatCompleteBodyStreamOptions]](../../models/dedicatedchatcompletebodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ToolChoice]](../../models/toolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tool_choice` | [Optional[models.DedicatedChatCompleteBodyToolChoice]](../../models/dedicatedchatcompletebodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | | `tools` | List[[models.Tool](../../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.

**When `tools` are specified, `min_tokens` field is unsupported.**
| | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | | @@ -94,7 +94,7 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.dedicated.chat.stream(model="meta-llama-3.1-8b-instruct", messages=[ +res = s.dedicated.chat.stream(model="(endpoint-id):(adapter-route)", messages=[ { "role": "system", "content": "You are a helpful assistant.", @@ -116,12 +116,12 @@ if res is not None: | Parameter | Type | Required | Description | Example | | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | | `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | | `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | -| `logit_bias` | [OptionalNullable[models.ChatStreamBodyLogitBias]](../../models/chatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | +| `logit_bias` | [OptionalNullable[models.DedicatedChatStreamBodyLogitBias]](../../models/dedicatedchatstreambodylogitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | | | `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | | | `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 | | `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.

**This field is unsupported when `tools` are specified.**
| | @@ -133,10 +133,10 @@ if res is not None: | `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | | | `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | | | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | -| `stream_options` | [OptionalNullable[models.ChatStreamBodyStreamOptions]](../../models/chatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | +| `stream_options` | [OptionalNullable[models.DedicatedChatStreamBodyStreamOptions]](../../models/dedicatedchatstreambodystreamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ChatStreamBodyToolChoice]](../../models/chatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tool_choice` | [Optional[models.DedicatedChatStreamBodyToolChoice]](../../models/dedicatedchatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | | `tools` | List[[models.Tool](../../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.

**When `tools` are specified, `min_tokens` field is unsupported.**
| | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | | diff --git a/docs/sdks/friendlicompletions/README.md b/docs/sdks/friendlicompletions/README.md index 301687c..e435ca4 100644 --- a/docs/sdks/friendlicompletions/README.md +++ b/docs/sdks/friendlicompletions/README.md @@ -22,9 +22,9 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.dedicated.completions.complete(completions_complete_body={ +res = s.dedicated.completions.complete(dedicated_completions_complete_body={ "prompt": "Say this is a test!", - "model": "meta-llama-3.1-8b-instruct", + "model": "(endpoint-id):(adapter-route)", "max_tokens": 200, "top_k": 1, }) @@ -37,11 +37,11 @@ if res is not None: ### Parameters -| Parameter | Type | Required | Description | -| ------------------------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `completions_complete_body` | [models.CompletionsCompleteBody](../../models/completionscompletebody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | +| Parameter | Type | Required | Description | +| ------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | +| `dedicated_completions_complete_body` | [models.DedicatedCompletionsCompleteBody](../../models/dedicatedcompletionscompletebody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | +| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | ### Response @@ -67,9 +67,9 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.dedicated.completions.stream(completions_stream_body={ +res = s.dedicated.completions.stream(dedicated_completions_stream_body={ "prompt": "Say this is a test!", - "model": "meta-llama-3.1-8b-instruct", + "model": "(endpoint-id):(adapter-route)", "max_tokens": 200, "top_k": 1, }) @@ -83,11 +83,11 @@ if res is not None: ### Parameters -| Parameter | Type | Required | Description | -| --------------------------------------------------------------------- | --------------------------------------------------------------------- | --------------------------------------------------------------------- | --------------------------------------------------------------------- | -| `completions_stream_body` | [models.CompletionsStreamBody](../../models/completionsstreambody.md) | :heavy_check_mark: | N/A | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | +| Parameter | Type | Required | Description | +| --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| `dedicated_completions_stream_body` | [models.DedicatedCompletionsStreamBody](../../models/dedicatedcompletionsstreambody.md) | :heavy_check_mark: | N/A | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | +| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | ### Response diff --git a/docs/sdks/friendlitoken/README.md b/docs/sdks/friendlitoken/README.md index 3e2f048..580752a 100644 --- a/docs/sdks/friendlitoken/README.md +++ b/docs/sdks/friendlitoken/README.md @@ -22,7 +22,7 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.dedicated.token.tokenization(model="meta-llama-3.1-8b-instruct", prompt="What is generative AI?") +res = s.dedicated.token.tokenization(model="(endpoint-id):(adapter-route)", prompt="What is generative AI?") if res is not None: # handle response @@ -32,12 +32,12 @@ if res is not None: ### Parameters -| Parameter | Type | Required | Description | Example | -| ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | -| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | -| `prompt` | *str* | :heavy_check_mark: | Input text prompt to tokenize. | What is generative AI? | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | | +| Parameter | Type | Required | Description | Example | +| ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `prompt` | *str* | :heavy_check_mark: | Input text prompt to tokenize. | What is generative AI? | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | +| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | | ### Response @@ -63,7 +63,7 @@ s = Friendli( token=os.getenv("FRIENDLI_TOKEN", ""), ) -res = s.dedicated.token.detokenization(model="meta-llama-3.1-8b-instruct", tokens=[ +res = s.dedicated.token.detokenization(model="(endpoint-id):(adapter-route)", tokens=[ 128000, 3923, 374, @@ -81,12 +81,12 @@ if res is not None: ### Parameters -| Parameter | Type | Required | Description | Example | -| ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | -| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | -| `model` | *Optional[str]* | :heavy_minus_sign: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | -| `tokens` | List[*int*] | :heavy_minus_sign: | A token sequence to detokenize. | [
128000,
3923,
374,
1803,
1413,
15592,
30
] | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | | +| Parameter | Type | Required | Description | Example | +| ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `model` | *str* | :heavy_check_mark: | ID of target endpoint. If you want to send request to specific adapter, using "ENDPOINT_ID:ADAPTER_ROUTE" format. | (endpoint-id):(adapter-route) | +| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | +| `tokens` | List[*int*] | :heavy_minus_sign: | A token sequence to detokenize. | [
128000,
3923,
374,
1803,
1413,
15592,
30
] | +| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | | ### Response diff --git a/docs/sdks/toolassistedchat/README.md b/docs/sdks/toolassistedchat/README.md index 0b6993a..beca0d7 100644 --- a/docs/sdks/toolassistedchat/README.md +++ b/docs/sdks/toolassistedchat/README.md @@ -35,9 +35,6 @@ res = s.serverless.tool_assisted_chat.complete(model="meta-llama-3.1-8b-instruct { "type": "math:calculator", }, - { - "type": "web:search", - }, ]) if res is not None: @@ -51,7 +48,7 @@ if res is not None: | Parameter | Type | Required | Description | Example | | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | -| `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | +| `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is 3 + 6?"
}
] | | `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | @@ -68,8 +65,8 @@ if res is not None: | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ToolAssistedChatCompleteBodyToolChoice]](../../models/toolassistedchatcompletebodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | -| `tools` | List[[models.ToolAssistedChatTool](../../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| | +| `tool_choice` | [Optional[models.ServerlessToolAssistedChatCompleteBodyToolChoice]](../../models/serverlesstoolassistedchatcompletebodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tools` | List[[models.ToolAssistedChatTool](../../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| math:calculator | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | | `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | | @@ -111,9 +108,6 @@ res = s.serverless.tool_assisted_chat.stream(model="meta-llama-3.1-8b-instruct", { "type": "math:calculator", }, - { - "type": "web:search", - }, ]) if res is not None: @@ -128,7 +122,7 @@ if res is not None: | Parameter | Type | Required | Description | Example | | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct | -| `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] | +| `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is 3 + 6?"
}
] | | `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | | | `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | | | `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | | @@ -145,8 +139,8 @@ if res is not None: | `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | | | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | | | `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | | -| `tool_choice` | [Optional[models.ToolAssistedChatStreamBodyToolChoice]](../../models/toolassistedchatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | -| `tools` | List[[models.ToolAssistedChatTool](../../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| | +| `tool_choice` | [Optional[models.ServerlessToolAssistedChatStreamBodyToolChoice]](../../models/serverlesstoolassistedchatstreambodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| | +| `tools` | List[[models.ToolAssistedChatTool](../../models/toolassistedchattool.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://friendli.ai/docs/guides/serverless_endpoints/tools/built_in_tools).

**When `tools` are specified, `min_tokens` field is unsupported.**
| math:calculator | | `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | | | `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | | diff --git a/pyproject.toml b/pyproject.toml index ab140fe..3b4b05a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "friendli" -version = "0.2.25" +version = "0.2.30" description = "Python Client SDK Generated by Speakeasy." authors = ["Speakeasy",] readme = "README-PYPI.md" diff --git a/src/friendli/_version.py b/src/friendli/_version.py index 90e87f4..3203c4e 100644 --- a/src/friendli/_version.py +++ b/src/friendli/_version.py @@ -3,7 +3,7 @@ import importlib.metadata __title__: str = "friendli" -__version__: str = "0.2.25" +__version__: str = "0.2.30" try: if __package__ is not None: diff --git a/src/friendli/chat.py b/src/friendli/chat.py index 9185362..2fda236 100644 --- a/src/friendli/chat.py +++ b/src/friendli/chat.py @@ -94,7 +94,7 @@ def complete( request = models.ServerlessChatCompleteRequest( x_friendli_team=x_friendli_team, - chat_complete_body=models.ChatCompleteBody( + serverless_chat_complete_body=models.ServerlessChatCompleteBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, @@ -145,11 +145,11 @@ def complete( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_complete_body, + request.serverless_chat_complete_body, False, False, "json", - models.ChatCompleteBody, + models.ServerlessChatCompleteBody, ), timeout_ms=timeout_ms, ) @@ -277,7 +277,7 @@ async def complete_async( request = models.ServerlessChatCompleteRequest( x_friendli_team=x_friendli_team, - chat_complete_body=models.ChatCompleteBody( + serverless_chat_complete_body=models.ServerlessChatCompleteBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, @@ -328,11 +328,11 @@ async def complete_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_complete_body, + request.serverless_chat_complete_body, False, False, "json", - models.ChatCompleteBody, + models.ServerlessChatCompleteBody, ), timeout_ms=timeout_ms, ) @@ -385,7 +385,8 @@ def stream( frequency_penalty: OptionalNullable[float] = UNSET, logit_bias: OptionalNullable[ Union[ - models.ChatStreamBodyLogitBias, models.ChatStreamBodyLogitBiasTypedDict + models.ServerlessChatStreamBodyLogitBias, + models.ServerlessChatStreamBodyLogitBiasTypedDict, ] ] = UNSET, logprobs: OptionalNullable[bool] = UNSET, @@ -403,16 +404,16 @@ def stream( stream: OptionalNullable[bool] = True, stream_options: OptionalNullable[ Union[ - models.ChatStreamBodyStreamOptions, - models.ChatStreamBodyStreamOptionsTypedDict, + models.ServerlessChatStreamBodyStreamOptions, + models.ServerlessChatStreamBodyStreamOptionsTypedDict, ] ] = UNSET, temperature: OptionalNullable[float] = 1, timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ChatStreamBodyToolChoice, - models.ChatStreamBodyToolChoiceTypedDict, + models.ServerlessChatStreamBodyToolChoice, + models.ServerlessChatStreamBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -468,13 +469,14 @@ def stream( request = models.ServerlessChatStreamRequest( x_friendli_team=x_friendli_team, - chat_stream_body=models.ChatStreamBody( + serverless_chat_stream_body=models.ServerlessChatStreamBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, frequency_penalty=frequency_penalty, logit_bias=utils.get_pydantic_model( - logit_bias, OptionalNullable[models.ChatStreamBodyLogitBias] + logit_bias, + OptionalNullable[models.ServerlessChatStreamBodyLogitBias], ), logprobs=logprobs, max_tokens=max_tokens, @@ -490,12 +492,13 @@ def stream( stop=stop, stream=stream, stream_options=utils.get_pydantic_model( - stream_options, OptionalNullable[models.ChatStreamBodyStreamOptions] + stream_options, + OptionalNullable[models.ServerlessChatStreamBodyStreamOptions], ), temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ChatStreamBodyToolChoice] + tool_choice, Optional[models.ServerlessChatStreamBodyToolChoice] ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.Tool]] @@ -519,7 +522,11 @@ def stream( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_stream_body, False, False, "json", models.ChatStreamBody + request.serverless_chat_stream_body, + False, + False, + "json", + models.ServerlessChatStreamBody, ), timeout_ms=timeout_ms, ) @@ -577,7 +584,8 @@ async def stream_async( frequency_penalty: OptionalNullable[float] = UNSET, logit_bias: OptionalNullable[ Union[ - models.ChatStreamBodyLogitBias, models.ChatStreamBodyLogitBiasTypedDict + models.ServerlessChatStreamBodyLogitBias, + models.ServerlessChatStreamBodyLogitBiasTypedDict, ] ] = UNSET, logprobs: OptionalNullable[bool] = UNSET, @@ -595,16 +603,16 @@ async def stream_async( stream: OptionalNullable[bool] = True, stream_options: OptionalNullable[ Union[ - models.ChatStreamBodyStreamOptions, - models.ChatStreamBodyStreamOptionsTypedDict, + models.ServerlessChatStreamBodyStreamOptions, + models.ServerlessChatStreamBodyStreamOptionsTypedDict, ] ] = UNSET, temperature: OptionalNullable[float] = 1, timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ChatStreamBodyToolChoice, - models.ChatStreamBodyToolChoiceTypedDict, + models.ServerlessChatStreamBodyToolChoice, + models.ServerlessChatStreamBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -660,13 +668,14 @@ async def stream_async( request = models.ServerlessChatStreamRequest( x_friendli_team=x_friendli_team, - chat_stream_body=models.ChatStreamBody( + serverless_chat_stream_body=models.ServerlessChatStreamBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, frequency_penalty=frequency_penalty, logit_bias=utils.get_pydantic_model( - logit_bias, OptionalNullable[models.ChatStreamBodyLogitBias] + logit_bias, + OptionalNullable[models.ServerlessChatStreamBodyLogitBias], ), logprobs=logprobs, max_tokens=max_tokens, @@ -682,12 +691,13 @@ async def stream_async( stop=stop, stream=stream, stream_options=utils.get_pydantic_model( - stream_options, OptionalNullable[models.ChatStreamBodyStreamOptions] + stream_options, + OptionalNullable[models.ServerlessChatStreamBodyStreamOptions], ), temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ChatStreamBodyToolChoice] + tool_choice, Optional[models.ServerlessChatStreamBodyToolChoice] ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.Tool]] @@ -711,7 +721,11 @@ async def stream_async( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_stream_body, False, False, "json", models.ChatStreamBody + request.serverless_chat_stream_body, + False, + False, + "json", + models.ServerlessChatStreamBody, ), timeout_ms=timeout_ms, ) diff --git a/src/friendli/completions.py b/src/friendli/completions.py index 8fe9598..88bac20 100644 --- a/src/friendli/completions.py +++ b/src/friendli/completions.py @@ -12,8 +12,9 @@ class Completions(BaseSDK): def complete( self, *, - completions_complete_body: Union[ - models.CompletionsCompleteBody, models.CompletionsCompleteBodyTypedDict + serverless_completions_complete_body: Union[ + models.ServerlessCompletionsCompleteBody, + models.ServerlessCompletionsCompleteBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -24,7 +25,7 @@ def complete( Generate text based on the given text prompt. - :param completions_complete_body: + :param serverless_completions_complete_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -40,8 +41,9 @@ def complete( request = models.ServerlessCompletionsCompleteRequest( x_friendli_team=x_friendli_team, - completions_complete_body=utils.get_pydantic_model( - completions_complete_body, models.CompletionsCompleteBody + serverless_completions_complete_body=utils.get_pydantic_model( + serverless_completions_complete_body, + models.ServerlessCompletionsCompleteBody, ), ) @@ -58,11 +60,11 @@ def complete( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_complete_body, + request.serverless_completions_complete_body, False, False, "json", - models.CompletionsCompleteBody, + models.ServerlessCompletionsCompleteBody, ), timeout_ms=timeout_ms, ) @@ -108,8 +110,9 @@ def complete( async def complete_async( self, *, - completions_complete_body: Union[ - models.CompletionsCompleteBody, models.CompletionsCompleteBodyTypedDict + serverless_completions_complete_body: Union[ + models.ServerlessCompletionsCompleteBody, + models.ServerlessCompletionsCompleteBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -120,7 +123,7 @@ async def complete_async( Generate text based on the given text prompt. - :param completions_complete_body: + :param serverless_completions_complete_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -136,8 +139,9 @@ async def complete_async( request = models.ServerlessCompletionsCompleteRequest( x_friendli_team=x_friendli_team, - completions_complete_body=utils.get_pydantic_model( - completions_complete_body, models.CompletionsCompleteBody + serverless_completions_complete_body=utils.get_pydantic_model( + serverless_completions_complete_body, + models.ServerlessCompletionsCompleteBody, ), ) @@ -154,11 +158,11 @@ async def complete_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_complete_body, + request.serverless_completions_complete_body, False, False, "json", - models.CompletionsCompleteBody, + models.ServerlessCompletionsCompleteBody, ), timeout_ms=timeout_ms, ) @@ -204,8 +208,9 @@ async def complete_async( def stream( self, *, - completions_stream_body: Union[ - models.CompletionsStreamBody, models.CompletionsStreamBodyTypedDict + serverless_completions_stream_body: Union[ + models.ServerlessCompletionsStreamBody, + models.ServerlessCompletionsStreamBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -216,7 +221,7 @@ def stream( Generate text based on the given text prompt. - :param completions_stream_body: + :param serverless_completions_stream_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -232,8 +237,9 @@ def stream( request = models.ServerlessCompletionsStreamRequest( x_friendli_team=x_friendli_team, - completions_stream_body=utils.get_pydantic_model( - completions_stream_body, models.CompletionsStreamBody + serverless_completions_stream_body=utils.get_pydantic_model( + serverless_completions_stream_body, + models.ServerlessCompletionsStreamBody, ), ) @@ -250,11 +256,11 @@ def stream( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_stream_body, + request.serverless_completions_stream_body, False, False, "json", - models.CompletionsStreamBody, + models.ServerlessCompletionsStreamBody, ), timeout_ms=timeout_ms, ) @@ -305,8 +311,9 @@ def stream( async def stream_async( self, *, - completions_stream_body: Union[ - models.CompletionsStreamBody, models.CompletionsStreamBodyTypedDict + serverless_completions_stream_body: Union[ + models.ServerlessCompletionsStreamBody, + models.ServerlessCompletionsStreamBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -317,7 +324,7 @@ async def stream_async( Generate text based on the given text prompt. - :param completions_stream_body: + :param serverless_completions_stream_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -333,8 +340,9 @@ async def stream_async( request = models.ServerlessCompletionsStreamRequest( x_friendli_team=x_friendli_team, - completions_stream_body=utils.get_pydantic_model( - completions_stream_body, models.CompletionsStreamBody + serverless_completions_stream_body=utils.get_pydantic_model( + serverless_completions_stream_body, + models.ServerlessCompletionsStreamBody, ), ) @@ -351,11 +359,11 @@ async def stream_async( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_stream_body, + request.serverless_completions_stream_body, False, False, "json", - models.CompletionsStreamBody, + models.ServerlessCompletionsStreamBody, ), timeout_ms=timeout_ms, ) diff --git a/src/friendli/friendli_chat.py b/src/friendli/friendli_chat.py index e0b8418..35f9e4d 100644 --- a/src/friendli/friendli_chat.py +++ b/src/friendli/friendli_chat.py @@ -18,7 +18,10 @@ def complete( eos_token: OptionalNullable[List[int]] = UNSET, frequency_penalty: OptionalNullable[float] = UNSET, logit_bias: OptionalNullable[ - Union[models.LogitBias, models.LogitBiasTypedDict] + Union[ + models.DedicatedChatCompleteBodyLogitBias, + models.DedicatedChatCompleteBodyLogitBiasTypedDict, + ] ] = UNSET, logprobs: OptionalNullable[bool] = UNSET, max_tokens: OptionalNullable[int] = UNSET, @@ -34,12 +37,18 @@ def complete( stop: OptionalNullable[List[str]] = UNSET, stream: OptionalNullable[bool] = False, stream_options: OptionalNullable[ - Union[models.StreamOptions, models.StreamOptionsTypedDict] + Union[ + models.DedicatedChatCompleteBodyStreamOptions, + models.DedicatedChatCompleteBodyStreamOptionsTypedDict, + ] ] = UNSET, temperature: OptionalNullable[float] = 1, timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ - Union[models.ToolChoice, models.ToolChoiceTypedDict] + Union[ + models.DedicatedChatCompleteBodyToolChoice, + models.DedicatedChatCompleteBodyToolChoiceTypedDict, + ] ] = None, tools: OptionalNullable[ Union[List[models.Tool], List[models.ToolTypedDict]] @@ -55,7 +64,7 @@ def complete( Given a list of messages forming a conversation, the model generates a response. - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param messages: A list of messages comprising the conversation so far. :param x_friendli_team: ID of team to run requests as (optional parameter). :param eos_token: A list of endpoint sentence tokens. @@ -94,13 +103,14 @@ def complete( request = models.DedicatedChatCompleteRequest( x_friendli_team=x_friendli_team, - chat_complete_body=models.ChatCompleteBody( + dedicated_chat_complete_body=models.DedicatedChatCompleteBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, frequency_penalty=frequency_penalty, logit_bias=utils.get_pydantic_model( - logit_bias, OptionalNullable[models.LogitBias] + logit_bias, + OptionalNullable[models.DedicatedChatCompleteBodyLogitBias], ), logprobs=logprobs, max_tokens=max_tokens, @@ -116,12 +126,13 @@ def complete( stop=stop, stream=stream, stream_options=utils.get_pydantic_model( - stream_options, OptionalNullable[models.StreamOptions] + stream_options, + OptionalNullable[models.DedicatedChatCompleteBodyStreamOptions], ), temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ToolChoice] + tool_choice, Optional[models.DedicatedChatCompleteBodyToolChoice] ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.Tool]] @@ -145,11 +156,11 @@ def complete( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_complete_body, + request.dedicated_chat_complete_body, False, False, "json", - models.ChatCompleteBody, + models.DedicatedChatCompleteBody, ), timeout_ms=timeout_ms, ) @@ -201,7 +212,10 @@ async def complete_async( eos_token: OptionalNullable[List[int]] = UNSET, frequency_penalty: OptionalNullable[float] = UNSET, logit_bias: OptionalNullable[ - Union[models.LogitBias, models.LogitBiasTypedDict] + Union[ + models.DedicatedChatCompleteBodyLogitBias, + models.DedicatedChatCompleteBodyLogitBiasTypedDict, + ] ] = UNSET, logprobs: OptionalNullable[bool] = UNSET, max_tokens: OptionalNullable[int] = UNSET, @@ -217,12 +231,18 @@ async def complete_async( stop: OptionalNullable[List[str]] = UNSET, stream: OptionalNullable[bool] = False, stream_options: OptionalNullable[ - Union[models.StreamOptions, models.StreamOptionsTypedDict] + Union[ + models.DedicatedChatCompleteBodyStreamOptions, + models.DedicatedChatCompleteBodyStreamOptionsTypedDict, + ] ] = UNSET, temperature: OptionalNullable[float] = 1, timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ - Union[models.ToolChoice, models.ToolChoiceTypedDict] + Union[ + models.DedicatedChatCompleteBodyToolChoice, + models.DedicatedChatCompleteBodyToolChoiceTypedDict, + ] ] = None, tools: OptionalNullable[ Union[List[models.Tool], List[models.ToolTypedDict]] @@ -238,7 +258,7 @@ async def complete_async( Given a list of messages forming a conversation, the model generates a response. - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param messages: A list of messages comprising the conversation so far. :param x_friendli_team: ID of team to run requests as (optional parameter). :param eos_token: A list of endpoint sentence tokens. @@ -277,13 +297,14 @@ async def complete_async( request = models.DedicatedChatCompleteRequest( x_friendli_team=x_friendli_team, - chat_complete_body=models.ChatCompleteBody( + dedicated_chat_complete_body=models.DedicatedChatCompleteBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, frequency_penalty=frequency_penalty, logit_bias=utils.get_pydantic_model( - logit_bias, OptionalNullable[models.LogitBias] + logit_bias, + OptionalNullable[models.DedicatedChatCompleteBodyLogitBias], ), logprobs=logprobs, max_tokens=max_tokens, @@ -299,12 +320,13 @@ async def complete_async( stop=stop, stream=stream, stream_options=utils.get_pydantic_model( - stream_options, OptionalNullable[models.StreamOptions] + stream_options, + OptionalNullable[models.DedicatedChatCompleteBodyStreamOptions], ), temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ToolChoice] + tool_choice, Optional[models.DedicatedChatCompleteBodyToolChoice] ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.Tool]] @@ -328,11 +350,11 @@ async def complete_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_complete_body, + request.dedicated_chat_complete_body, False, False, "json", - models.ChatCompleteBody, + models.DedicatedChatCompleteBody, ), timeout_ms=timeout_ms, ) @@ -385,7 +407,8 @@ def stream( frequency_penalty: OptionalNullable[float] = UNSET, logit_bias: OptionalNullable[ Union[ - models.ChatStreamBodyLogitBias, models.ChatStreamBodyLogitBiasTypedDict + models.DedicatedChatStreamBodyLogitBias, + models.DedicatedChatStreamBodyLogitBiasTypedDict, ] ] = UNSET, logprobs: OptionalNullable[bool] = UNSET, @@ -403,16 +426,16 @@ def stream( stream: OptionalNullable[bool] = True, stream_options: OptionalNullable[ Union[ - models.ChatStreamBodyStreamOptions, - models.ChatStreamBodyStreamOptionsTypedDict, + models.DedicatedChatStreamBodyStreamOptions, + models.DedicatedChatStreamBodyStreamOptionsTypedDict, ] ] = UNSET, temperature: OptionalNullable[float] = 1, timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ChatStreamBodyToolChoice, - models.ChatStreamBodyToolChoiceTypedDict, + models.DedicatedChatStreamBodyToolChoice, + models.DedicatedChatStreamBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -429,7 +452,7 @@ def stream( Given a list of messages forming a conversation, the model generates a response. - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param messages: A list of messages comprising the conversation so far. :param x_friendli_team: ID of team to run requests as (optional parameter). :param eos_token: A list of endpoint sentence tokens. @@ -468,13 +491,14 @@ def stream( request = models.DedicatedChatStreamRequest( x_friendli_team=x_friendli_team, - chat_stream_body=models.ChatStreamBody( + dedicated_chat_stream_body=models.DedicatedChatStreamBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, frequency_penalty=frequency_penalty, logit_bias=utils.get_pydantic_model( - logit_bias, OptionalNullable[models.ChatStreamBodyLogitBias] + logit_bias, + OptionalNullable[models.DedicatedChatStreamBodyLogitBias], ), logprobs=logprobs, max_tokens=max_tokens, @@ -490,12 +514,13 @@ def stream( stop=stop, stream=stream, stream_options=utils.get_pydantic_model( - stream_options, OptionalNullable[models.ChatStreamBodyStreamOptions] + stream_options, + OptionalNullable[models.DedicatedChatStreamBodyStreamOptions], ), temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ChatStreamBodyToolChoice] + tool_choice, Optional[models.DedicatedChatStreamBodyToolChoice] ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.Tool]] @@ -519,7 +544,11 @@ def stream( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_stream_body, False, False, "json", models.ChatStreamBody + request.dedicated_chat_stream_body, + False, + False, + "json", + models.DedicatedChatStreamBody, ), timeout_ms=timeout_ms, ) @@ -577,7 +606,8 @@ async def stream_async( frequency_penalty: OptionalNullable[float] = UNSET, logit_bias: OptionalNullable[ Union[ - models.ChatStreamBodyLogitBias, models.ChatStreamBodyLogitBiasTypedDict + models.DedicatedChatStreamBodyLogitBias, + models.DedicatedChatStreamBodyLogitBiasTypedDict, ] ] = UNSET, logprobs: OptionalNullable[bool] = UNSET, @@ -595,16 +625,16 @@ async def stream_async( stream: OptionalNullable[bool] = True, stream_options: OptionalNullable[ Union[ - models.ChatStreamBodyStreamOptions, - models.ChatStreamBodyStreamOptionsTypedDict, + models.DedicatedChatStreamBodyStreamOptions, + models.DedicatedChatStreamBodyStreamOptionsTypedDict, ] ] = UNSET, temperature: OptionalNullable[float] = 1, timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ChatStreamBodyToolChoice, - models.ChatStreamBodyToolChoiceTypedDict, + models.DedicatedChatStreamBodyToolChoice, + models.DedicatedChatStreamBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -621,7 +651,7 @@ async def stream_async( Given a list of messages forming a conversation, the model generates a response. - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param messages: A list of messages comprising the conversation so far. :param x_friendli_team: ID of team to run requests as (optional parameter). :param eos_token: A list of endpoint sentence tokens. @@ -660,13 +690,14 @@ async def stream_async( request = models.DedicatedChatStreamRequest( x_friendli_team=x_friendli_team, - chat_stream_body=models.ChatStreamBody( + dedicated_chat_stream_body=models.DedicatedChatStreamBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, frequency_penalty=frequency_penalty, logit_bias=utils.get_pydantic_model( - logit_bias, OptionalNullable[models.ChatStreamBodyLogitBias] + logit_bias, + OptionalNullable[models.DedicatedChatStreamBodyLogitBias], ), logprobs=logprobs, max_tokens=max_tokens, @@ -682,12 +713,13 @@ async def stream_async( stop=stop, stream=stream, stream_options=utils.get_pydantic_model( - stream_options, OptionalNullable[models.ChatStreamBodyStreamOptions] + stream_options, + OptionalNullable[models.DedicatedChatStreamBodyStreamOptions], ), temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ChatStreamBodyToolChoice] + tool_choice, Optional[models.DedicatedChatStreamBodyToolChoice] ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.Tool]] @@ -711,7 +743,11 @@ async def stream_async( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.chat_stream_body, False, False, "json", models.ChatStreamBody + request.dedicated_chat_stream_body, + False, + False, + "json", + models.DedicatedChatStreamBody, ), timeout_ms=timeout_ms, ) diff --git a/src/friendli/friendli_completions.py b/src/friendli/friendli_completions.py index cf469d8..5c468e0 100644 --- a/src/friendli/friendli_completions.py +++ b/src/friendli/friendli_completions.py @@ -12,8 +12,9 @@ class FriendliCompletions(BaseSDK): def complete( self, *, - completions_complete_body: Union[ - models.CompletionsCompleteBody, models.CompletionsCompleteBodyTypedDict + dedicated_completions_complete_body: Union[ + models.DedicatedCompletionsCompleteBody, + models.DedicatedCompletionsCompleteBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -24,7 +25,7 @@ def complete( Generate text based on the given text prompt. - :param completions_complete_body: + :param dedicated_completions_complete_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -40,8 +41,9 @@ def complete( request = models.DedicatedCompletionsCompleteRequest( x_friendli_team=x_friendli_team, - completions_complete_body=utils.get_pydantic_model( - completions_complete_body, models.CompletionsCompleteBody + dedicated_completions_complete_body=utils.get_pydantic_model( + dedicated_completions_complete_body, + models.DedicatedCompletionsCompleteBody, ), ) @@ -58,11 +60,11 @@ def complete( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_complete_body, + request.dedicated_completions_complete_body, False, False, "json", - models.CompletionsCompleteBody, + models.DedicatedCompletionsCompleteBody, ), timeout_ms=timeout_ms, ) @@ -108,8 +110,9 @@ def complete( async def complete_async( self, *, - completions_complete_body: Union[ - models.CompletionsCompleteBody, models.CompletionsCompleteBodyTypedDict + dedicated_completions_complete_body: Union[ + models.DedicatedCompletionsCompleteBody, + models.DedicatedCompletionsCompleteBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -120,7 +123,7 @@ async def complete_async( Generate text based on the given text prompt. - :param completions_complete_body: + :param dedicated_completions_complete_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -136,8 +139,9 @@ async def complete_async( request = models.DedicatedCompletionsCompleteRequest( x_friendli_team=x_friendli_team, - completions_complete_body=utils.get_pydantic_model( - completions_complete_body, models.CompletionsCompleteBody + dedicated_completions_complete_body=utils.get_pydantic_model( + dedicated_completions_complete_body, + models.DedicatedCompletionsCompleteBody, ), ) @@ -154,11 +158,11 @@ async def complete_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_complete_body, + request.dedicated_completions_complete_body, False, False, "json", - models.CompletionsCompleteBody, + models.DedicatedCompletionsCompleteBody, ), timeout_ms=timeout_ms, ) @@ -204,8 +208,9 @@ async def complete_async( def stream( self, *, - completions_stream_body: Union[ - models.CompletionsStreamBody, models.CompletionsStreamBodyTypedDict + dedicated_completions_stream_body: Union[ + models.DedicatedCompletionsStreamBody, + models.DedicatedCompletionsStreamBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -216,7 +221,7 @@ def stream( Generate text based on the given text prompt. - :param completions_stream_body: + :param dedicated_completions_stream_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -232,8 +237,8 @@ def stream( request = models.DedicatedCompletionsStreamRequest( x_friendli_team=x_friendli_team, - completions_stream_body=utils.get_pydantic_model( - completions_stream_body, models.CompletionsStreamBody + dedicated_completions_stream_body=utils.get_pydantic_model( + dedicated_completions_stream_body, models.DedicatedCompletionsStreamBody ), ) @@ -250,11 +255,11 @@ def stream( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_stream_body, + request.dedicated_completions_stream_body, False, False, "json", - models.CompletionsStreamBody, + models.DedicatedCompletionsStreamBody, ), timeout_ms=timeout_ms, ) @@ -305,8 +310,9 @@ def stream( async def stream_async( self, *, - completions_stream_body: Union[ - models.CompletionsStreamBody, models.CompletionsStreamBodyTypedDict + dedicated_completions_stream_body: Union[ + models.DedicatedCompletionsStreamBody, + models.DedicatedCompletionsStreamBodyTypedDict, ], x_friendli_team: Optional[str] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, @@ -317,7 +323,7 @@ async def stream_async( Generate text based on the given text prompt. - :param completions_stream_body: + :param dedicated_completions_stream_body: :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -333,8 +339,8 @@ async def stream_async( request = models.DedicatedCompletionsStreamRequest( x_friendli_team=x_friendli_team, - completions_stream_body=utils.get_pydantic_model( - completions_stream_body, models.CompletionsStreamBody + dedicated_completions_stream_body=utils.get_pydantic_model( + dedicated_completions_stream_body, models.DedicatedCompletionsStreamBody ), ) @@ -351,11 +357,11 @@ async def stream_async( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.completions_stream_body, + request.dedicated_completions_stream_body, False, False, "json", - models.CompletionsStreamBody, + models.DedicatedCompletionsStreamBody, ), timeout_ms=timeout_ms, ) diff --git a/src/friendli/friendli_token.py b/src/friendli/friendli_token.py index edceb1c..b37fffc 100644 --- a/src/friendli/friendli_token.py +++ b/src/friendli/friendli_token.py @@ -23,7 +23,7 @@ def tokenization( By giving a text input, generate a tokenized output of token IDs. - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param prompt: Input text prompt to tokenize. :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method @@ -40,7 +40,7 @@ def tokenization( request = models.DedicatedTokenizationRequest( x_friendli_team=x_friendli_team, - tokenization_body=models.TokenizationBody( + dedicated_tokenization_body=models.DedicatedTokenizationBody( model=model, prompt=prompt, ), @@ -59,7 +59,11 @@ def tokenization( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tokenization_body, False, False, "json", models.TokenizationBody + request.dedicated_tokenization_body, + False, + False, + "json", + models.DedicatedTokenizationBody, ), timeout_ms=timeout_ms, ) @@ -116,7 +120,7 @@ async def tokenization_async( By giving a text input, generate a tokenized output of token IDs. - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param prompt: Input text prompt to tokenize. :param x_friendli_team: ID of team to run requests as (optional parameter). :param retries: Override the default retry configuration for this method @@ -133,7 +137,7 @@ async def tokenization_async( request = models.DedicatedTokenizationRequest( x_friendli_team=x_friendli_team, - tokenization_body=models.TokenizationBody( + dedicated_tokenization_body=models.DedicatedTokenizationBody( model=model, prompt=prompt, ), @@ -152,7 +156,11 @@ async def tokenization_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tokenization_body, False, False, "json", models.TokenizationBody + request.dedicated_tokenization_body, + False, + False, + "json", + models.DedicatedTokenizationBody, ), timeout_ms=timeout_ms, ) @@ -198,8 +206,8 @@ async def tokenization_async( def detokenization( self, *, + model: str, x_friendli_team: Optional[str] = None, - model: Optional[str] = None, tokens: Optional[List[int]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, @@ -209,8 +217,8 @@ def detokenization( By giving a list of tokens, generate a detokenized output text string. + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param x_friendli_team: ID of team to run requests as (optional parameter). - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). :param tokens: A token sequence to detokenize. :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -226,7 +234,7 @@ def detokenization( request = models.DedicatedDetokenizationRequest( x_friendli_team=x_friendli_team, - detokenization_body=models.DetokenizationBody( + dedicated_detokenization_body=models.DedicatedDetokenizationBody( model=model, tokens=tokens, ), @@ -245,11 +253,11 @@ def detokenization( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.detokenization_body, + request.dedicated_detokenization_body, False, False, "json", - models.DetokenizationBody, + models.DedicatedDetokenizationBody, ), timeout_ms=timeout_ms, ) @@ -295,8 +303,8 @@ def detokenization( async def detokenization_async( self, *, + model: str, x_friendli_team: Optional[str] = None, - model: Optional[str] = None, tokens: Optional[List[int]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, @@ -306,8 +314,8 @@ async def detokenization_async( By giving a list of tokens, generate a detokenized output text string. + :param model: ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format. :param x_friendli_team: ID of team to run requests as (optional parameter). - :param model: Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models). :param tokens: A token sequence to detokenize. :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method @@ -323,7 +331,7 @@ async def detokenization_async( request = models.DedicatedDetokenizationRequest( x_friendli_team=x_friendli_team, - detokenization_body=models.DetokenizationBody( + dedicated_detokenization_body=models.DedicatedDetokenizationBody( model=model, tokens=tokens, ), @@ -342,11 +350,11 @@ async def detokenization_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.detokenization_body, + request.dedicated_detokenization_body, False, False, "json", - models.DetokenizationBody, + models.DedicatedDetokenizationBody, ), timeout_ms=timeout_ms, ) diff --git a/src/friendli/models/__init__.py b/src/friendli/models/__init__.py index e2c7ba1..982a6ba 100644 --- a/src/friendli/models/__init__.py +++ b/src/friendli/models/__init__.py @@ -21,37 +21,7 @@ ChatChoiceType, ChatChoiceTypedDict, ) -from .chatcompletebody import ( - ChatCompleteBody, - ChatCompleteBodyTypedDict, - LogitBias, - LogitBiasTypedDict, - Object, - ObjectTypedDict, - StreamOptions, - StreamOptionsTypedDict, - ToolChoice, - ToolChoiceFunction, - ToolChoiceFunctionTypedDict, - ToolChoiceType, - ToolChoiceTypedDict, -) from .chatresult import ChatResult, ChatResultTypedDict -from .chatstreambody import ( - ChatStreamBody, - ChatStreamBodyLogitBias, - ChatStreamBodyLogitBiasTypedDict, - ChatStreamBodyStreamOptions, - ChatStreamBodyStreamOptionsTypedDict, - ChatStreamBodyToolChoice, - ChatStreamBodyToolChoiceFunction, - ChatStreamBodyToolChoiceFunctionTypedDict, - ChatStreamBodyToolChoiceType, - ChatStreamBodyToolChoiceTypedDict, - ChatStreamBodyTypedDict, - ToolChoiceObject, - ToolChoiceObjectTypedDict, -) from .completionsbodywithprompt import ( CompletionsBodyWithPrompt, CompletionsBodyWithPromptTypedDict, @@ -61,44 +31,85 @@ CompletionsBodyWithTokensTypedDict, ) from .completionschoice import CompletionsChoice, CompletionsChoiceTypedDict -from .completionscompletebody import ( - CompletionsCompleteBody, - CompletionsCompleteBodyTypedDict, -) from .completionsresult import CompletionsResult, CompletionsResultTypedDict -from .completionsstreambody import ( - CompletionsStreamBody, - CompletionsStreamBodyCompletionsBodyWithPrompt, - CompletionsStreamBodyCompletionsBodyWithPromptTypedDict, - CompletionsStreamBodyCompletionsBodyWithTokens, - CompletionsStreamBodyCompletionsBodyWithTokensTypedDict, - CompletionsStreamBodyTypedDict, +from .dedicatedchatcompletebody import ( + DedicatedChatCompleteBody, + DedicatedChatCompleteBodyLogitBias, + DedicatedChatCompleteBodyLogitBiasTypedDict, + DedicatedChatCompleteBodyStreamOptions, + DedicatedChatCompleteBodyStreamOptionsTypedDict, + DedicatedChatCompleteBodyToolChoice, + DedicatedChatCompleteBodyToolChoiceFunction, + DedicatedChatCompleteBodyToolChoiceFunctionTypedDict, + DedicatedChatCompleteBodyToolChoiceObject, + DedicatedChatCompleteBodyToolChoiceObjectTypedDict, + DedicatedChatCompleteBodyToolChoiceType, + DedicatedChatCompleteBodyToolChoiceTypedDict, + DedicatedChatCompleteBodyTypedDict, ) from .dedicatedchatcompleteop import ( DedicatedChatCompleteRequest, DedicatedChatCompleteRequestTypedDict, ) +from .dedicatedchatstreambody import ( + DedicatedChatStreamBody, + DedicatedChatStreamBodyLogitBias, + DedicatedChatStreamBodyLogitBiasTypedDict, + DedicatedChatStreamBodyStreamOptions, + DedicatedChatStreamBodyStreamOptionsTypedDict, + DedicatedChatStreamBodyToolChoice, + DedicatedChatStreamBodyToolChoiceFunction, + DedicatedChatStreamBodyToolChoiceFunctionTypedDict, + DedicatedChatStreamBodyToolChoiceObject, + DedicatedChatStreamBodyToolChoiceObjectTypedDict, + DedicatedChatStreamBodyToolChoiceType, + DedicatedChatStreamBodyToolChoiceTypedDict, + DedicatedChatStreamBodyTypedDict, +) from .dedicatedchatstreamop import ( DedicatedChatStreamRequest, DedicatedChatStreamRequestTypedDict, ) +from .dedicatedcompletionscompletebody import ( + DedicatedCompletionsCompleteBody, + DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt, + DedicatedCompletionsCompleteBodyCompletionsBodyWithPromptTypedDict, + DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens, + DedicatedCompletionsCompleteBodyCompletionsBodyWithTokensTypedDict, + DedicatedCompletionsCompleteBodyTypedDict, +) from .dedicatedcompletionscompleteop import ( DedicatedCompletionsCompleteRequest, DedicatedCompletionsCompleteRequestTypedDict, ) +from .dedicatedcompletionsstreambody import ( + DedicatedCompletionsStreamBody, + DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt, + DedicatedCompletionsStreamBodyCompletionsBodyWithPromptTypedDict, + DedicatedCompletionsStreamBodyCompletionsBodyWithTokens, + DedicatedCompletionsStreamBodyCompletionsBodyWithTokensTypedDict, + DedicatedCompletionsStreamBodyTypedDict, +) from .dedicatedcompletionsstreamop import ( DedicatedCompletionsStreamRequest, DedicatedCompletionsStreamRequestTypedDict, ) +from .dedicateddetokenizationbody import ( + DedicatedDetokenizationBody, + DedicatedDetokenizationBodyTypedDict, +) from .dedicateddetokenizationop import ( DedicatedDetokenizationRequest, DedicatedDetokenizationRequestTypedDict, ) +from .dedicatedtokenizationbody import ( + DedicatedTokenizationBody, + DedicatedTokenizationBodyTypedDict, +) from .dedicatedtokenizationop import ( DedicatedTokenizationRequest, DedicatedTokenizationRequestTypedDict, ) -from .detokenizationbody import DetokenizationBody, DetokenizationBodyTypedDict from .detokenizationresult import DetokenizationResult, DetokenizationResultTypedDict from .filebuiltintool import ( FileBuiltInTool, @@ -124,34 +135,106 @@ from .responseformat import ResponseFormat, ResponseFormatTypedDict, Type from .sdkerror import SDKError from .security import Security, SecurityTypedDict +from .serverlesschatcompletebody import ( + LogitBias, + LogitBiasTypedDict, + Object, + ObjectTypedDict, + ServerlessChatCompleteBody, + ServerlessChatCompleteBodyTypedDict, + StreamOptions, + StreamOptionsTypedDict, + ToolChoice, + ToolChoiceFunction, + ToolChoiceFunctionTypedDict, + ToolChoiceType, + ToolChoiceTypedDict, +) from .serverlesschatcompleteop import ( ServerlessChatCompleteRequest, ServerlessChatCompleteRequestTypedDict, ) +from .serverlesschatstreambody import ( + ServerlessChatStreamBody, + ServerlessChatStreamBodyLogitBias, + ServerlessChatStreamBodyLogitBiasTypedDict, + ServerlessChatStreamBodyStreamOptions, + ServerlessChatStreamBodyStreamOptionsTypedDict, + ServerlessChatStreamBodyToolChoice, + ServerlessChatStreamBodyToolChoiceFunction, + ServerlessChatStreamBodyToolChoiceFunctionTypedDict, + ServerlessChatStreamBodyToolChoiceType, + ServerlessChatStreamBodyToolChoiceTypedDict, + ServerlessChatStreamBodyTypedDict, + ToolChoiceObject, + ToolChoiceObjectTypedDict, +) from .serverlesschatstreamop import ( ServerlessChatStreamRequest, ServerlessChatStreamRequestTypedDict, ) +from .serverlesscompletionscompletebody import ( + ServerlessCompletionsCompleteBody, + ServerlessCompletionsCompleteBodyTypedDict, +) from .serverlesscompletionscompleteop import ( ServerlessCompletionsCompleteRequest, ServerlessCompletionsCompleteRequestTypedDict, ) +from .serverlesscompletionsstreambody import ( + ServerlessCompletionsStreamBody, + ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt, + ServerlessCompletionsStreamBodyCompletionsBodyWithPromptTypedDict, + ServerlessCompletionsStreamBodyCompletionsBodyWithTokens, + ServerlessCompletionsStreamBodyCompletionsBodyWithTokensTypedDict, + ServerlessCompletionsStreamBodyTypedDict, +) from .serverlesscompletionsstreamop import ( ServerlessCompletionsStreamRequest, ServerlessCompletionsStreamRequestTypedDict, ) +from .serverlessdetokenizationbody import ( + ServerlessDetokenizationBody, + ServerlessDetokenizationBodyTypedDict, +) from .serverlessdetokenizationop import ( ServerlessDetokenizationRequest, ServerlessDetokenizationRequestTypedDict, ) +from .serverlesstokenizationbody import ( + ServerlessTokenizationBody, + ServerlessTokenizationBodyTypedDict, +) from .serverlesstokenizationop import ( ServerlessTokenizationRequest, ServerlessTokenizationRequestTypedDict, ) +from .serverlesstoolassistedchatcompletebody import ( + ServerlessToolAssistedChatCompleteBody, + ServerlessToolAssistedChatCompleteBodyToolChoice, + ServerlessToolAssistedChatCompleteBodyToolChoiceFunction, + ServerlessToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict, + ServerlessToolAssistedChatCompleteBodyToolChoiceObject, + ServerlessToolAssistedChatCompleteBodyToolChoiceObjectTypedDict, + ServerlessToolAssistedChatCompleteBodyToolChoiceType, + ServerlessToolAssistedChatCompleteBodyToolChoiceTypedDict, + ServerlessToolAssistedChatCompleteBodyTypedDict, +) from .serverlesstoolassistedchatcompleteop import ( ServerlessToolAssistedChatCompleteRequest, ServerlessToolAssistedChatCompleteRequestTypedDict, ) +from .serverlesstoolassistedchatstreambody import ( + ServerlessToolAssistedChatStreamBody, + ServerlessToolAssistedChatStreamBodyToolChoice, + ServerlessToolAssistedChatStreamBodyToolChoiceFunction, + ServerlessToolAssistedChatStreamBodyToolChoiceFunctionTypedDict, + ServerlessToolAssistedChatStreamBodyToolChoiceObject, + ServerlessToolAssistedChatStreamBodyToolChoiceObjectTypedDict, + ServerlessToolAssistedChatStreamBodyToolChoiceType, + ServerlessToolAssistedChatStreamBodyToolChoiceTypedDict, + ServerlessToolAssistedChatStreamBodyTypedDict, +) from .serverlesstoolassistedchatstreamop import ( ServerlessToolAssistedChatStreamRequest, ServerlessToolAssistedChatStreamRequestTypedDict, @@ -196,32 +279,9 @@ StreamedToolAssistedChatResultTypedDict, ) from .systemmessage import Role, SystemMessage, SystemMessageTypedDict -from .tokenizationbody import TokenizationBody, TokenizationBodyTypedDict from .tokenizationresult import TokenizationResult, TokenizationResultTypedDict from .tokensequence import TokenSequence, TokenSequenceTypedDict from .tool import Tool, ToolType, ToolTypedDict -from .toolassistedchatcompletebody import ( - ToolAssistedChatCompleteBody, - ToolAssistedChatCompleteBodyToolChoice, - ToolAssistedChatCompleteBodyToolChoiceFunction, - ToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict, - ToolAssistedChatCompleteBodyToolChoiceObject, - ToolAssistedChatCompleteBodyToolChoiceObjectTypedDict, - ToolAssistedChatCompleteBodyToolChoiceType, - ToolAssistedChatCompleteBodyToolChoiceTypedDict, - ToolAssistedChatCompleteBodyTypedDict, -) -from .toolassistedchatstreambody import ( - ToolAssistedChatStreamBody, - ToolAssistedChatStreamBodyToolChoice, - ToolAssistedChatStreamBodyToolChoiceFunction, - ToolAssistedChatStreamBodyToolChoiceFunctionTypedDict, - ToolAssistedChatStreamBodyToolChoiceObject, - ToolAssistedChatStreamBodyToolChoiceObjectTypedDict, - ToolAssistedChatStreamBodyToolChoiceType, - ToolAssistedChatStreamBodyToolChoiceTypedDict, - ToolAssistedChatStreamBodyTypedDict, -) from .toolassistedchattool import ToolAssistedChatTool, ToolAssistedChatToolTypedDict from .toolmessage import ToolMessage, ToolMessageRole, ToolMessageTypedDict from .usage import Usage, UsageTypedDict @@ -243,57 +303,76 @@ "ChatChoiceToolCallsTypedDict", "ChatChoiceType", "ChatChoiceTypedDict", - "ChatCompleteBody", - "ChatCompleteBodyTypedDict", "ChatResult", "ChatResultTypedDict", - "ChatStreamBody", - "ChatStreamBodyLogitBias", - "ChatStreamBodyLogitBiasTypedDict", - "ChatStreamBodyStreamOptions", - "ChatStreamBodyStreamOptionsTypedDict", - "ChatStreamBodyToolChoice", - "ChatStreamBodyToolChoiceFunction", - "ChatStreamBodyToolChoiceFunctionTypedDict", - "ChatStreamBodyToolChoiceType", - "ChatStreamBodyToolChoiceTypedDict", - "ChatStreamBodyTypedDict", "CompletionsBodyWithPrompt", "CompletionsBodyWithPromptTypedDict", "CompletionsBodyWithTokens", "CompletionsBodyWithTokensTypedDict", "CompletionsChoice", "CompletionsChoiceTypedDict", - "CompletionsCompleteBody", - "CompletionsCompleteBodyTypedDict", "CompletionsResult", "CompletionsResultTypedDict", - "CompletionsStreamBody", - "CompletionsStreamBodyCompletionsBodyWithPrompt", - "CompletionsStreamBodyCompletionsBodyWithPromptTypedDict", - "CompletionsStreamBodyCompletionsBodyWithTokens", - "CompletionsStreamBodyCompletionsBodyWithTokensTypedDict", - "CompletionsStreamBodyTypedDict", "Content", "ContentTypedDict", "Data", "DataTypedDict", + "DedicatedChatCompleteBody", + "DedicatedChatCompleteBodyLogitBias", + "DedicatedChatCompleteBodyLogitBiasTypedDict", + "DedicatedChatCompleteBodyStreamOptions", + "DedicatedChatCompleteBodyStreamOptionsTypedDict", + "DedicatedChatCompleteBodyToolChoice", + "DedicatedChatCompleteBodyToolChoiceFunction", + "DedicatedChatCompleteBodyToolChoiceFunctionTypedDict", + "DedicatedChatCompleteBodyToolChoiceObject", + "DedicatedChatCompleteBodyToolChoiceObjectTypedDict", + "DedicatedChatCompleteBodyToolChoiceType", + "DedicatedChatCompleteBodyToolChoiceTypedDict", + "DedicatedChatCompleteBodyTypedDict", "DedicatedChatCompleteRequest", "DedicatedChatCompleteRequestTypedDict", + "DedicatedChatStreamBody", + "DedicatedChatStreamBodyLogitBias", + "DedicatedChatStreamBodyLogitBiasTypedDict", + "DedicatedChatStreamBodyStreamOptions", + "DedicatedChatStreamBodyStreamOptionsTypedDict", + "DedicatedChatStreamBodyToolChoice", + "DedicatedChatStreamBodyToolChoiceFunction", + "DedicatedChatStreamBodyToolChoiceFunctionTypedDict", + "DedicatedChatStreamBodyToolChoiceObject", + "DedicatedChatStreamBodyToolChoiceObjectTypedDict", + "DedicatedChatStreamBodyToolChoiceType", + "DedicatedChatStreamBodyToolChoiceTypedDict", + "DedicatedChatStreamBodyTypedDict", "DedicatedChatStreamRequest", "DedicatedChatStreamRequestTypedDict", + "DedicatedCompletionsCompleteBody", + "DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt", + "DedicatedCompletionsCompleteBodyCompletionsBodyWithPromptTypedDict", + "DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens", + "DedicatedCompletionsCompleteBodyCompletionsBodyWithTokensTypedDict", + "DedicatedCompletionsCompleteBodyTypedDict", "DedicatedCompletionsCompleteRequest", "DedicatedCompletionsCompleteRequestTypedDict", + "DedicatedCompletionsStreamBody", + "DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt", + "DedicatedCompletionsStreamBodyCompletionsBodyWithPromptTypedDict", + "DedicatedCompletionsStreamBodyCompletionsBodyWithTokens", + "DedicatedCompletionsStreamBodyCompletionsBodyWithTokensTypedDict", + "DedicatedCompletionsStreamBodyTypedDict", "DedicatedCompletionsStreamRequest", "DedicatedCompletionsStreamRequestTypedDict", + "DedicatedDetokenizationBody", + "DedicatedDetokenizationBodyTypedDict", "DedicatedDetokenizationRequest", "DedicatedDetokenizationRequestTypedDict", + "DedicatedTokenizationBody", + "DedicatedTokenizationBodyTypedDict", "DedicatedTokenizationRequest", "DedicatedTokenizationRequestTypedDict", "Delta", "DeltaTypedDict", - "DetokenizationBody", - "DetokenizationBodyTypedDict", "DetokenizationResult", "DetokenizationResultTypedDict", "Event", @@ -324,20 +403,63 @@ "SDKError", "Security", "SecurityTypedDict", + "ServerlessChatCompleteBody", + "ServerlessChatCompleteBodyTypedDict", "ServerlessChatCompleteRequest", "ServerlessChatCompleteRequestTypedDict", + "ServerlessChatStreamBody", + "ServerlessChatStreamBodyLogitBias", + "ServerlessChatStreamBodyLogitBiasTypedDict", + "ServerlessChatStreamBodyStreamOptions", + "ServerlessChatStreamBodyStreamOptionsTypedDict", + "ServerlessChatStreamBodyToolChoice", + "ServerlessChatStreamBodyToolChoiceFunction", + "ServerlessChatStreamBodyToolChoiceFunctionTypedDict", + "ServerlessChatStreamBodyToolChoiceType", + "ServerlessChatStreamBodyToolChoiceTypedDict", + "ServerlessChatStreamBodyTypedDict", "ServerlessChatStreamRequest", "ServerlessChatStreamRequestTypedDict", + "ServerlessCompletionsCompleteBody", + "ServerlessCompletionsCompleteBodyTypedDict", "ServerlessCompletionsCompleteRequest", "ServerlessCompletionsCompleteRequestTypedDict", + "ServerlessCompletionsStreamBody", + "ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt", + "ServerlessCompletionsStreamBodyCompletionsBodyWithPromptTypedDict", + "ServerlessCompletionsStreamBodyCompletionsBodyWithTokens", + "ServerlessCompletionsStreamBodyCompletionsBodyWithTokensTypedDict", + "ServerlessCompletionsStreamBodyTypedDict", "ServerlessCompletionsStreamRequest", "ServerlessCompletionsStreamRequestTypedDict", + "ServerlessDetokenizationBody", + "ServerlessDetokenizationBodyTypedDict", "ServerlessDetokenizationRequest", "ServerlessDetokenizationRequestTypedDict", + "ServerlessTokenizationBody", + "ServerlessTokenizationBodyTypedDict", "ServerlessTokenizationRequest", "ServerlessTokenizationRequestTypedDict", + "ServerlessToolAssistedChatCompleteBody", + "ServerlessToolAssistedChatCompleteBodyToolChoice", + "ServerlessToolAssistedChatCompleteBodyToolChoiceFunction", + "ServerlessToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict", + "ServerlessToolAssistedChatCompleteBodyToolChoiceObject", + "ServerlessToolAssistedChatCompleteBodyToolChoiceObjectTypedDict", + "ServerlessToolAssistedChatCompleteBodyToolChoiceType", + "ServerlessToolAssistedChatCompleteBodyToolChoiceTypedDict", + "ServerlessToolAssistedChatCompleteBodyTypedDict", "ServerlessToolAssistedChatCompleteRequest", "ServerlessToolAssistedChatCompleteRequestTypedDict", + "ServerlessToolAssistedChatStreamBody", + "ServerlessToolAssistedChatStreamBodyToolChoice", + "ServerlessToolAssistedChatStreamBodyToolChoiceFunction", + "ServerlessToolAssistedChatStreamBodyToolChoiceFunctionTypedDict", + "ServerlessToolAssistedChatStreamBodyToolChoiceObject", + "ServerlessToolAssistedChatStreamBodyToolChoiceObjectTypedDict", + "ServerlessToolAssistedChatStreamBodyToolChoiceType", + "ServerlessToolAssistedChatStreamBodyToolChoiceTypedDict", + "ServerlessToolAssistedChatStreamBodyTypedDict", "ServerlessToolAssistedChatStreamRequest", "ServerlessToolAssistedChatStreamRequestTypedDict", "StreamOptions", @@ -368,29 +490,9 @@ "SystemMessageTypedDict", "TokenSequence", "TokenSequenceTypedDict", - "TokenizationBody", - "TokenizationBodyTypedDict", "TokenizationResult", "TokenizationResultTypedDict", "Tool", - "ToolAssistedChatCompleteBody", - "ToolAssistedChatCompleteBodyToolChoice", - "ToolAssistedChatCompleteBodyToolChoiceFunction", - "ToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict", - "ToolAssistedChatCompleteBodyToolChoiceObject", - "ToolAssistedChatCompleteBodyToolChoiceObjectTypedDict", - "ToolAssistedChatCompleteBodyToolChoiceType", - "ToolAssistedChatCompleteBodyToolChoiceTypedDict", - "ToolAssistedChatCompleteBodyTypedDict", - "ToolAssistedChatStreamBody", - "ToolAssistedChatStreamBodyToolChoice", - "ToolAssistedChatStreamBodyToolChoiceFunction", - "ToolAssistedChatStreamBodyToolChoiceFunctionTypedDict", - "ToolAssistedChatStreamBodyToolChoiceObject", - "ToolAssistedChatStreamBodyToolChoiceObjectTypedDict", - "ToolAssistedChatStreamBodyToolChoiceType", - "ToolAssistedChatStreamBodyToolChoiceTypedDict", - "ToolAssistedChatStreamBodyTypedDict", "ToolAssistedChatTool", "ToolAssistedChatToolTypedDict", "ToolCalls", diff --git a/src/friendli/models/dedicatedchatcompletebody.py b/src/friendli/models/dedicatedchatcompletebody.py new file mode 100644 index 0000000..e4babef --- /dev/null +++ b/src/friendli/models/dedicatedchatcompletebody.py @@ -0,0 +1,387 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from .message import Message, MessageTypedDict +from .responseformat import ResponseFormat, ResponseFormatTypedDict +from .tool import Tool, ToolTypedDict +from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL +from pydantic import model_serializer +from typing import List, Literal, Optional, Union +from typing_extensions import NotRequired, TypedDict + + +class DedicatedChatCompleteBodyLogitBiasTypedDict(TypedDict): + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + + +class DedicatedChatCompleteBodyLogitBias(BaseModel): + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + + +class DedicatedChatCompleteBodyStreamOptionsTypedDict(TypedDict): + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + + include_usage: NotRequired[Nullable[bool]] + r"""When set to `true`, + the number of tokens used will be included at the end of the stream result in the form of + `\"usage\": {\"completion_tokens\": number, \"prompt_tokens\": number, \"total_tokens\": number}`. + + """ + + +class DedicatedChatCompleteBodyStreamOptions(BaseModel): + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + + include_usage: OptionalNullable[bool] = UNSET + r"""When set to `true`, + the number of tokens used will be included at the end of the stream result in the form of + `\"usage\": {\"completion_tokens\": number, \"prompt_tokens\": number, \"total_tokens\": number}`. + + """ + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["include_usage"] + nullable_fields = ["include_usage"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +DedicatedChatCompleteBodyToolChoiceType = Literal["function"] +r"""The type of the tool. Currently, only `function` is supported.""" + + +class DedicatedChatCompleteBodyToolChoiceFunctionTypedDict(TypedDict): + name: str + r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" + + +class DedicatedChatCompleteBodyToolChoiceFunction(BaseModel): + name: str + r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" + + +class DedicatedChatCompleteBodyToolChoiceObjectTypedDict(TypedDict): + type: DedicatedChatCompleteBodyToolChoiceType + r"""The type of the tool. Currently, only `function` is supported.""" + function: DedicatedChatCompleteBodyToolChoiceFunctionTypedDict + + +class DedicatedChatCompleteBodyToolChoiceObject(BaseModel): + type: DedicatedChatCompleteBodyToolChoiceType + r"""The type of the tool. Currently, only `function` is supported.""" + + function: DedicatedChatCompleteBodyToolChoiceFunction + + +DedicatedChatCompleteBodyToolChoiceTypedDict = Union[ + DedicatedChatCompleteBodyToolChoiceObjectTypedDict, str +] +r"""Determines the tool calling behavior of the model. +When set to `none`, the model will bypass tool execution and generate a response directly. +In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. +Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. +You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + +""" + + +DedicatedChatCompleteBodyToolChoice = Union[ + DedicatedChatCompleteBodyToolChoiceObject, str +] +r"""Determines the tool calling behavior of the model. +When set to `none`, the model will bypass tool execution and generate a response directly. +In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. +Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. +You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + +""" + + +class DedicatedChatCompleteBodyTypedDict(TypedDict): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + messages: List[MessageTypedDict] + r"""A list of messages comprising the conversation so far.""" + eos_token: NotRequired[Nullable[List[int]]] + r"""A list of endpoint sentence tokens.""" + frequency_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + logit_bias: NotRequired[Nullable[DedicatedChatCompleteBodyLogitBiasTypedDict]] + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + logprobs: NotRequired[Nullable[bool]] + r"""Whether to return log probabilities of the output tokens or not.""" + max_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + min_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. + + **This field is unsupported when `tools` are specified.** + + """ + n: NotRequired[Nullable[int]] + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + parallel_tool_calls: NotRequired[Nullable[bool]] + r"""Whether to enable parallel function calling.""" + presence_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + response_format: NotRequired[Nullable[ResponseFormatTypedDict]] + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + seed: NotRequired[Nullable[List[int]]] + r"""Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + stop: NotRequired[Nullable[List[str]]] + r"""When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.""" + stream: NotRequired[Nullable[bool]] + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + stream_options: NotRequired[ + Nullable[DedicatedChatCompleteBodyStreamOptionsTypedDict] + ] + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + temperature: NotRequired[Nullable[float]] + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + timeout_microseconds: NotRequired[Nullable[int]] + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + tool_choice: NotRequired[DedicatedChatCompleteBodyToolChoiceTypedDict] + r"""Determines the tool calling behavior of the model. + When set to `none`, the model will bypass tool execution and generate a response directly. + In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. + Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. + You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + + """ + tools: NotRequired[Nullable[List[ToolTypedDict]]] + r"""A list of tools the model may call. + Currently, only functions are supported as a tool. + A maximum of 128 functions is supported. + Use this to provide a list of functions the model may generate JSON inputs for. + + **When `tools` are specified, `min_tokens` field is unsupported.** + + """ + top_k: NotRequired[Nullable[int]] + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + top_logprobs: NotRequired[Nullable[int]] + r"""The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.""" + top_p: NotRequired[Nullable[float]] + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + +class DedicatedChatCompleteBody(BaseModel): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + messages: List[Message] + r"""A list of messages comprising the conversation so far.""" + + eos_token: OptionalNullable[List[int]] = UNSET + r"""A list of endpoint sentence tokens.""" + + frequency_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + + logit_bias: OptionalNullable[DedicatedChatCompleteBodyLogitBias] = UNSET + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + + logprobs: OptionalNullable[bool] = UNSET + r"""Whether to return log probabilities of the output tokens or not.""" + + max_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + + min_tokens: OptionalNullable[int] = 0 + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. + + **This field is unsupported when `tools` are specified.** + + """ + + n: OptionalNullable[int] = 1 + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + + parallel_tool_calls: OptionalNullable[bool] = UNSET + r"""Whether to enable parallel function calling.""" + + presence_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + + repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + + response_format: OptionalNullable[ResponseFormat] = UNSET + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + + seed: OptionalNullable[List[int]] = UNSET + r"""Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + + stop: OptionalNullable[List[str]] = UNSET + r"""When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.""" + + stream: OptionalNullable[bool] = False + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + + stream_options: OptionalNullable[DedicatedChatCompleteBodyStreamOptions] = UNSET + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + + temperature: OptionalNullable[float] = 1 + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + + timeout_microseconds: OptionalNullable[int] = UNSET + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + + tool_choice: Optional[DedicatedChatCompleteBodyToolChoice] = None + r"""Determines the tool calling behavior of the model. + When set to `none`, the model will bypass tool execution and generate a response directly. + In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. + Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. + You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + + """ + + tools: OptionalNullable[List[Tool]] = UNSET + r"""A list of tools the model may call. + Currently, only functions are supported as a tool. + A maximum of 128 functions is supported. + Use this to provide a list of functions the model may generate JSON inputs for. + + **When `tools` are specified, `min_tokens` field is unsupported.** + + """ + + top_k: OptionalNullable[int] = 0 + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + + top_logprobs: OptionalNullable[int] = UNSET + r"""The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.""" + + top_p: OptionalNullable[float] = 1 + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = [ + "eos_token", + "frequency_penalty", + "logit_bias", + "logprobs", + "max_tokens", + "min_tokens", + "n", + "parallel_tool_calls", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stream", + "stream_options", + "temperature", + "timeout_microseconds", + "tool_choice", + "tools", + "top_k", + "top_logprobs", + "top_p", + ] + nullable_fields = [ + "eos_token", + "frequency_penalty", + "logit_bias", + "logprobs", + "max_tokens", + "min_tokens", + "n", + "parallel_tool_calls", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stream", + "stream_options", + "temperature", + "timeout_microseconds", + "tools", + "top_k", + "top_logprobs", + "top_p", + ] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m diff --git a/src/friendli/models/dedicatedchatcompleteop.py b/src/friendli/models/dedicatedchatcompleteop.py index 70e520b..334ae7e 100644 --- a/src/friendli/models/dedicatedchatcompleteop.py +++ b/src/friendli/models/dedicatedchatcompleteop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .chatcompletebody import ChatCompleteBody, ChatCompleteBodyTypedDict +from .dedicatedchatcompletebody import ( + DedicatedChatCompleteBody, + DedicatedChatCompleteBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class DedicatedChatCompleteRequestTypedDict(TypedDict): - chat_complete_body: ChatCompleteBodyTypedDict + dedicated_chat_complete_body: DedicatedChatCompleteBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class DedicatedChatCompleteRequest(BaseModel): - chat_complete_body: Annotated[ - ChatCompleteBody, + dedicated_chat_complete_body: Annotated[ + DedicatedChatCompleteBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/dedicatedchatstreambody.py b/src/friendli/models/dedicatedchatstreambody.py new file mode 100644 index 0000000..7af6c23 --- /dev/null +++ b/src/friendli/models/dedicatedchatstreambody.py @@ -0,0 +1,383 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from .message import Message, MessageTypedDict +from .responseformat import ResponseFormat, ResponseFormatTypedDict +from .tool import Tool, ToolTypedDict +from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL +from pydantic import model_serializer +from typing import List, Literal, Optional, Union +from typing_extensions import NotRequired, TypedDict + + +class DedicatedChatStreamBodyLogitBiasTypedDict(TypedDict): + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + + +class DedicatedChatStreamBodyLogitBias(BaseModel): + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + + +class DedicatedChatStreamBodyStreamOptionsTypedDict(TypedDict): + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + + include_usage: NotRequired[Nullable[bool]] + r"""When set to `true`, + the number of tokens used will be included at the end of the stream result in the form of + `\"usage\": {\"completion_tokens\": number, \"prompt_tokens\": number, \"total_tokens\": number}`. + + """ + + +class DedicatedChatStreamBodyStreamOptions(BaseModel): + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + + include_usage: OptionalNullable[bool] = UNSET + r"""When set to `true`, + the number of tokens used will be included at the end of the stream result in the form of + `\"usage\": {\"completion_tokens\": number, \"prompt_tokens\": number, \"total_tokens\": number}`. + + """ + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["include_usage"] + nullable_fields = ["include_usage"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +DedicatedChatStreamBodyToolChoiceType = Literal["function"] +r"""The type of the tool. Currently, only `function` is supported.""" + + +class DedicatedChatStreamBodyToolChoiceFunctionTypedDict(TypedDict): + name: str + r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" + + +class DedicatedChatStreamBodyToolChoiceFunction(BaseModel): + name: str + r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" + + +class DedicatedChatStreamBodyToolChoiceObjectTypedDict(TypedDict): + type: DedicatedChatStreamBodyToolChoiceType + r"""The type of the tool. Currently, only `function` is supported.""" + function: DedicatedChatStreamBodyToolChoiceFunctionTypedDict + + +class DedicatedChatStreamBodyToolChoiceObject(BaseModel): + type: DedicatedChatStreamBodyToolChoiceType + r"""The type of the tool. Currently, only `function` is supported.""" + + function: DedicatedChatStreamBodyToolChoiceFunction + + +DedicatedChatStreamBodyToolChoiceTypedDict = Union[ + DedicatedChatStreamBodyToolChoiceObjectTypedDict, str +] +r"""Determines the tool calling behavior of the model. +When set to `none`, the model will bypass tool execution and generate a response directly. +In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. +Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. +You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + +""" + + +DedicatedChatStreamBodyToolChoice = Union[DedicatedChatStreamBodyToolChoiceObject, str] +r"""Determines the tool calling behavior of the model. +When set to `none`, the model will bypass tool execution and generate a response directly. +In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. +Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. +You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + +""" + + +class DedicatedChatStreamBodyTypedDict(TypedDict): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + messages: List[MessageTypedDict] + r"""A list of messages comprising the conversation so far.""" + eos_token: NotRequired[Nullable[List[int]]] + r"""A list of endpoint sentence tokens.""" + frequency_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + logit_bias: NotRequired[Nullable[DedicatedChatStreamBodyLogitBiasTypedDict]] + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + logprobs: NotRequired[Nullable[bool]] + r"""Whether to return log probabilities of the output tokens or not.""" + max_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + min_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. + + **This field is unsupported when `tools` are specified.** + + """ + n: NotRequired[Nullable[int]] + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + parallel_tool_calls: NotRequired[Nullable[bool]] + r"""Whether to enable parallel function calling.""" + presence_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + response_format: NotRequired[Nullable[ResponseFormatTypedDict]] + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + seed: NotRequired[Nullable[List[int]]] + r"""Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + stop: NotRequired[Nullable[List[str]]] + r"""When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.""" + stream: NotRequired[Nullable[bool]] + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + stream_options: NotRequired[Nullable[DedicatedChatStreamBodyStreamOptionsTypedDict]] + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + temperature: NotRequired[Nullable[float]] + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + timeout_microseconds: NotRequired[Nullable[int]] + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + tool_choice: NotRequired[DedicatedChatStreamBodyToolChoiceTypedDict] + r"""Determines the tool calling behavior of the model. + When set to `none`, the model will bypass tool execution and generate a response directly. + In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. + Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. + You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + + """ + tools: NotRequired[Nullable[List[ToolTypedDict]]] + r"""A list of tools the model may call. + Currently, only functions are supported as a tool. + A maximum of 128 functions is supported. + Use this to provide a list of functions the model may generate JSON inputs for. + + **When `tools` are specified, `min_tokens` field is unsupported.** + + """ + top_k: NotRequired[Nullable[int]] + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + top_logprobs: NotRequired[Nullable[int]] + r"""The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.""" + top_p: NotRequired[Nullable[float]] + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + +class DedicatedChatStreamBody(BaseModel): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + messages: List[Message] + r"""A list of messages comprising the conversation so far.""" + + eos_token: OptionalNullable[List[int]] = UNSET + r"""A list of endpoint sentence tokens.""" + + frequency_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + + logit_bias: OptionalNullable[DedicatedChatStreamBodyLogitBias] = UNSET + r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" + + logprobs: OptionalNullable[bool] = UNSET + r"""Whether to return log probabilities of the output tokens or not.""" + + max_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + + min_tokens: OptionalNullable[int] = 0 + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. + + **This field is unsupported when `tools` are specified.** + + """ + + n: OptionalNullable[int] = 1 + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + + parallel_tool_calls: OptionalNullable[bool] = UNSET + r"""Whether to enable parallel function calling.""" + + presence_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + + repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + + response_format: OptionalNullable[ResponseFormat] = UNSET + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + + seed: OptionalNullable[List[int]] = UNSET + r"""Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + + stop: OptionalNullable[List[str]] = UNSET + r"""When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.""" + + stream: OptionalNullable[bool] = True + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + + stream_options: OptionalNullable[DedicatedChatStreamBodyStreamOptions] = UNSET + r"""Options related to stream. + It can only be used when `stream: true`. + + """ + + temperature: OptionalNullable[float] = 1 + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + + timeout_microseconds: OptionalNullable[int] = UNSET + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + + tool_choice: Optional[DedicatedChatStreamBodyToolChoice] = None + r"""Determines the tool calling behavior of the model. + When set to `none`, the model will bypass tool execution and generate a response directly. + In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. + Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. + You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`. + + """ + + tools: OptionalNullable[List[Tool]] = UNSET + r"""A list of tools the model may call. + Currently, only functions are supported as a tool. + A maximum of 128 functions is supported. + Use this to provide a list of functions the model may generate JSON inputs for. + + **When `tools` are specified, `min_tokens` field is unsupported.** + + """ + + top_k: OptionalNullable[int] = 0 + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + + top_logprobs: OptionalNullable[int] = UNSET + r"""The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.""" + + top_p: OptionalNullable[float] = 1 + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = [ + "eos_token", + "frequency_penalty", + "logit_bias", + "logprobs", + "max_tokens", + "min_tokens", + "n", + "parallel_tool_calls", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stream", + "stream_options", + "temperature", + "timeout_microseconds", + "tool_choice", + "tools", + "top_k", + "top_logprobs", + "top_p", + ] + nullable_fields = [ + "eos_token", + "frequency_penalty", + "logit_bias", + "logprobs", + "max_tokens", + "min_tokens", + "n", + "parallel_tool_calls", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stream", + "stream_options", + "temperature", + "timeout_microseconds", + "tools", + "top_k", + "top_logprobs", + "top_p", + ] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m diff --git a/src/friendli/models/dedicatedchatstreamop.py b/src/friendli/models/dedicatedchatstreamop.py index b25ccff..6198b98 100644 --- a/src/friendli/models/dedicatedchatstreamop.py +++ b/src/friendli/models/dedicatedchatstreamop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .chatstreambody import ChatStreamBody, ChatStreamBodyTypedDict +from .dedicatedchatstreambody import ( + DedicatedChatStreamBody, + DedicatedChatStreamBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class DedicatedChatStreamRequestTypedDict(TypedDict): - chat_stream_body: ChatStreamBodyTypedDict + dedicated_chat_stream_body: DedicatedChatStreamBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class DedicatedChatStreamRequest(BaseModel): - chat_stream_body: Annotated[ - ChatStreamBody, + dedicated_chat_stream_body: Annotated[ + DedicatedChatStreamBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/dedicatedcompletionscompletebody.py b/src/friendli/models/dedicatedcompletionscompletebody.py new file mode 100644 index 0000000..2d5d00c --- /dev/null +++ b/src/friendli/models/dedicatedcompletionscompletebody.py @@ -0,0 +1,679 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from .responseformat import ResponseFormat, ResponseFormatTypedDict +from .tokensequence import TokenSequence, TokenSequenceTypedDict +from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL +from pydantic import model_serializer +from typing import List, Union +from typing_extensions import NotRequired, TypedDict + + +class DedicatedCompletionsCompleteBodyCompletionsBodyWithTokensTypedDict(TypedDict): + tokens: List[int] + r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.""" + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + bad_word_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + bad_words: NotRequired[Nullable[List[str]]] + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + beam_compat_no_post_normalization: NotRequired[Nullable[bool]] + beam_compat_pre_normalization: NotRequired[Nullable[bool]] + beam_search_type: NotRequired[Nullable[str]] + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + early_stopping: NotRequired[Nullable[bool]] + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + embedding_to_replace: NotRequired[Nullable[List[float]]] + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + encoder_no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + encoder_repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + eos_token: NotRequired[Nullable[List[int]]] + r"""A list of endpoint sentence tokens.""" + forced_output_tokens: NotRequired[Nullable[List[int]]] + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + frequency_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + include_output_logits: NotRequired[Nullable[bool]] + r"""Whether to include the output logits to the generation output.""" + include_output_logprobs: NotRequired[Nullable[bool]] + r"""Whether to include the output logprobs to the generation output.""" + length_penalty: NotRequired[Nullable[float]] + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + max_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + max_total_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + min_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + min_total_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + n: NotRequired[Nullable[int]] + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + num_beams: NotRequired[Nullable[int]] + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + presence_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + response_format: NotRequired[Nullable[ResponseFormatTypedDict]] + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + seed: NotRequired[Nullable[List[int]]] + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + stop: NotRequired[Nullable[List[str]]] + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + stop_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + stream: NotRequired[Nullable[bool]] + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search.""" + temperature: NotRequired[Nullable[float]] + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + timeout_microseconds: NotRequired[Nullable[int]] + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + token_index_to_replace: NotRequired[Nullable[List[int]]] + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + top_k: NotRequired[Nullable[int]] + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + top_p: NotRequired[Nullable[float]] + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + +class DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens(BaseModel): + tokens: List[int] + r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.""" + + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + bad_word_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + + bad_words: OptionalNullable[List[str]] = UNSET + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + + beam_compat_no_post_normalization: OptionalNullable[bool] = UNSET + + beam_compat_pre_normalization: OptionalNullable[bool] = UNSET + + beam_search_type: OptionalNullable[str] = "DETERMINISTIC" + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + + early_stopping: OptionalNullable[bool] = False + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + + embedding_to_replace: OptionalNullable[List[float]] = UNSET + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + + encoder_no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + + encoder_repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + + eos_token: OptionalNullable[List[int]] = UNSET + r"""A list of endpoint sentence tokens.""" + + forced_output_tokens: OptionalNullable[List[int]] = UNSET + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + + frequency_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + + include_output_logits: OptionalNullable[bool] = UNSET + r"""Whether to include the output logits to the generation output.""" + + include_output_logprobs: OptionalNullable[bool] = UNSET + r"""Whether to include the output logprobs to the generation output.""" + + length_penalty: OptionalNullable[float] = UNSET + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + + max_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + + max_total_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + + min_tokens: OptionalNullable[int] = 0 + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + + min_total_tokens: OptionalNullable[int] = UNSET + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + + n: OptionalNullable[int] = 1 + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + + no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + + num_beams: OptionalNullable[int] = UNSET + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + + presence_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + + repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + + response_format: OptionalNullable[ResponseFormat] = UNSET + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + + seed: OptionalNullable[List[int]] = UNSET + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + + stop: OptionalNullable[List[str]] = UNSET + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + + stop_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + + stream: OptionalNullable[bool] = False + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search.""" + + temperature: OptionalNullable[float] = 1 + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + + timeout_microseconds: OptionalNullable[int] = UNSET + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + + token_index_to_replace: OptionalNullable[List[int]] = UNSET + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + + top_k: OptionalNullable[int] = 0 + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + + top_p: OptionalNullable[float] = 1 + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + nullable_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +class DedicatedCompletionsCompleteBodyCompletionsBodyWithPromptTypedDict(TypedDict): + prompt: str + r"""The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required.""" + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + bad_word_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + bad_words: NotRequired[Nullable[List[str]]] + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + beam_compat_no_post_normalization: NotRequired[Nullable[bool]] + beam_compat_pre_normalization: NotRequired[Nullable[bool]] + beam_search_type: NotRequired[Nullable[str]] + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + early_stopping: NotRequired[Nullable[bool]] + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + embedding_to_replace: NotRequired[Nullable[List[float]]] + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + encoder_no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + encoder_repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + eos_token: NotRequired[Nullable[List[int]]] + r"""A list of endpoint sentence tokens.""" + forced_output_tokens: NotRequired[Nullable[List[int]]] + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + frequency_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + include_output_logits: NotRequired[Nullable[bool]] + r"""Whether to include the output logits to the generation output.""" + include_output_logprobs: NotRequired[Nullable[bool]] + r"""Whether to include the output logprobs to the generation output.""" + length_penalty: NotRequired[Nullable[float]] + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + max_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + max_total_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + min_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + min_total_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + n: NotRequired[Nullable[int]] + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + num_beams: NotRequired[Nullable[int]] + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + presence_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + response_format: NotRequired[Nullable[ResponseFormatTypedDict]] + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + seed: NotRequired[Nullable[List[int]]] + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + stop: NotRequired[Nullable[List[str]]] + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + stop_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + stream: NotRequired[Nullable[bool]] + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search.""" + temperature: NotRequired[Nullable[float]] + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + timeout_microseconds: NotRequired[Nullable[int]] + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + token_index_to_replace: NotRequired[Nullable[List[int]]] + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + top_k: NotRequired[Nullable[int]] + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + top_p: NotRequired[Nullable[float]] + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + +class DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt(BaseModel): + prompt: str + r"""The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required.""" + + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + bad_word_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + + bad_words: OptionalNullable[List[str]] = UNSET + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + + beam_compat_no_post_normalization: OptionalNullable[bool] = UNSET + + beam_compat_pre_normalization: OptionalNullable[bool] = UNSET + + beam_search_type: OptionalNullable[str] = "DETERMINISTIC" + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + + early_stopping: OptionalNullable[bool] = False + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + + embedding_to_replace: OptionalNullable[List[float]] = UNSET + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + + encoder_no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + + encoder_repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + + eos_token: OptionalNullable[List[int]] = UNSET + r"""A list of endpoint sentence tokens.""" + + forced_output_tokens: OptionalNullable[List[int]] = UNSET + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + + frequency_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + + include_output_logits: OptionalNullable[bool] = UNSET + r"""Whether to include the output logits to the generation output.""" + + include_output_logprobs: OptionalNullable[bool] = UNSET + r"""Whether to include the output logprobs to the generation output.""" + + length_penalty: OptionalNullable[float] = UNSET + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + + max_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + + max_total_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + + min_tokens: OptionalNullable[int] = 0 + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + + min_total_tokens: OptionalNullable[int] = UNSET + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + + n: OptionalNullable[int] = 1 + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + + no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + + num_beams: OptionalNullable[int] = UNSET + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + + presence_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + + repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + + response_format: OptionalNullable[ResponseFormat] = UNSET + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + + seed: OptionalNullable[List[int]] = UNSET + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + + stop: OptionalNullable[List[str]] = UNSET + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + + stop_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + + stream: OptionalNullable[bool] = False + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search.""" + + temperature: OptionalNullable[float] = 1 + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + + timeout_microseconds: OptionalNullable[int] = UNSET + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + + token_index_to_replace: OptionalNullable[List[int]] = UNSET + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + + top_k: OptionalNullable[int] = 0 + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + + top_p: OptionalNullable[float] = 1 + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + nullable_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +DedicatedCompletionsCompleteBodyTypedDict = Union[ + DedicatedCompletionsCompleteBodyCompletionsBodyWithPromptTypedDict, + DedicatedCompletionsCompleteBodyCompletionsBodyWithTokensTypedDict, +] + + +DedicatedCompletionsCompleteBody = Union[ + DedicatedCompletionsCompleteBodyCompletionsBodyWithPrompt, + DedicatedCompletionsCompleteBodyCompletionsBodyWithTokens, +] diff --git a/src/friendli/models/dedicatedcompletionscompleteop.py b/src/friendli/models/dedicatedcompletionscompleteop.py index dfd690c..2a8bbe1 100644 --- a/src/friendli/models/dedicatedcompletionscompleteop.py +++ b/src/friendli/models/dedicatedcompletionscompleteop.py @@ -1,9 +1,9 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .completionscompletebody import ( - CompletionsCompleteBody, - CompletionsCompleteBodyTypedDict, +from .dedicatedcompletionscompletebody import ( + DedicatedCompletionsCompleteBody, + DedicatedCompletionsCompleteBodyTypedDict, ) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata @@ -13,14 +13,14 @@ class DedicatedCompletionsCompleteRequestTypedDict(TypedDict): - completions_complete_body: CompletionsCompleteBodyTypedDict + dedicated_completions_complete_body: DedicatedCompletionsCompleteBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class DedicatedCompletionsCompleteRequest(BaseModel): - completions_complete_body: Annotated[ - CompletionsCompleteBody, + dedicated_completions_complete_body: Annotated[ + DedicatedCompletionsCompleteBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/dedicatedcompletionsstreambody.py b/src/friendli/models/dedicatedcompletionsstreambody.py new file mode 100644 index 0000000..5dd6324 --- /dev/null +++ b/src/friendli/models/dedicatedcompletionsstreambody.py @@ -0,0 +1,679 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from .responseformat import ResponseFormat, ResponseFormatTypedDict +from .tokensequence import TokenSequence, TokenSequenceTypedDict +from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL +from pydantic import model_serializer +from typing import List, Union +from typing_extensions import NotRequired, TypedDict + + +class DedicatedCompletionsStreamBodyCompletionsBodyWithTokensTypedDict(TypedDict): + tokens: List[int] + r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.""" + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + bad_word_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + bad_words: NotRequired[Nullable[List[str]]] + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + beam_compat_no_post_normalization: NotRequired[Nullable[bool]] + beam_compat_pre_normalization: NotRequired[Nullable[bool]] + beam_search_type: NotRequired[Nullable[str]] + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + early_stopping: NotRequired[Nullable[bool]] + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + embedding_to_replace: NotRequired[Nullable[List[float]]] + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + encoder_no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + encoder_repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + eos_token: NotRequired[Nullable[List[int]]] + r"""A list of endpoint sentence tokens.""" + forced_output_tokens: NotRequired[Nullable[List[int]]] + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + frequency_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + include_output_logits: NotRequired[Nullable[bool]] + r"""Whether to include the output logits to the generation output.""" + include_output_logprobs: NotRequired[Nullable[bool]] + r"""Whether to include the output logprobs to the generation output.""" + length_penalty: NotRequired[Nullable[float]] + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + max_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + max_total_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + min_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + min_total_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + n: NotRequired[Nullable[int]] + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + num_beams: NotRequired[Nullable[int]] + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + presence_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + response_format: NotRequired[Nullable[ResponseFormatTypedDict]] + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + seed: NotRequired[Nullable[List[int]]] + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + stop: NotRequired[Nullable[List[str]]] + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + stop_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + stream: NotRequired[Nullable[bool]] + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + temperature: NotRequired[Nullable[float]] + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + timeout_microseconds: NotRequired[Nullable[int]] + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + token_index_to_replace: NotRequired[Nullable[List[int]]] + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + top_k: NotRequired[Nullable[int]] + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + top_p: NotRequired[Nullable[float]] + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + +class DedicatedCompletionsStreamBodyCompletionsBodyWithTokens(BaseModel): + tokens: List[int] + r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.""" + + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + bad_word_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + + bad_words: OptionalNullable[List[str]] = UNSET + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + + beam_compat_no_post_normalization: OptionalNullable[bool] = UNSET + + beam_compat_pre_normalization: OptionalNullable[bool] = UNSET + + beam_search_type: OptionalNullable[str] = "DETERMINISTIC" + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + + early_stopping: OptionalNullable[bool] = False + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + + embedding_to_replace: OptionalNullable[List[float]] = UNSET + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + + encoder_no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + + encoder_repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + + eos_token: OptionalNullable[List[int]] = UNSET + r"""A list of endpoint sentence tokens.""" + + forced_output_tokens: OptionalNullable[List[int]] = UNSET + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + + frequency_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + + include_output_logits: OptionalNullable[bool] = UNSET + r"""Whether to include the output logits to the generation output.""" + + include_output_logprobs: OptionalNullable[bool] = UNSET + r"""Whether to include the output logprobs to the generation output.""" + + length_penalty: OptionalNullable[float] = UNSET + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + + max_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + + max_total_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + + min_tokens: OptionalNullable[int] = 0 + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + + min_total_tokens: OptionalNullable[int] = UNSET + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + + n: OptionalNullable[int] = 1 + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + + no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + + num_beams: OptionalNullable[int] = UNSET + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + + presence_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + + repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + + response_format: OptionalNullable[ResponseFormat] = UNSET + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + + seed: OptionalNullable[List[int]] = UNSET + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + + stop: OptionalNullable[List[str]] = UNSET + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + + stop_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + + stream: OptionalNullable[bool] = True + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + + temperature: OptionalNullable[float] = 1 + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + + timeout_microseconds: OptionalNullable[int] = UNSET + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + + token_index_to_replace: OptionalNullable[List[int]] = UNSET + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + + top_k: OptionalNullable[int] = 0 + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + + top_p: OptionalNullable[float] = 1 + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + nullable_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +class DedicatedCompletionsStreamBodyCompletionsBodyWithPromptTypedDict(TypedDict): + prompt: str + r"""The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required.""" + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + bad_word_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + bad_words: NotRequired[Nullable[List[str]]] + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + beam_compat_no_post_normalization: NotRequired[Nullable[bool]] + beam_compat_pre_normalization: NotRequired[Nullable[bool]] + beam_search_type: NotRequired[Nullable[str]] + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + early_stopping: NotRequired[Nullable[bool]] + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + embedding_to_replace: NotRequired[Nullable[List[float]]] + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + encoder_no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + encoder_repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + eos_token: NotRequired[Nullable[List[int]]] + r"""A list of endpoint sentence tokens.""" + forced_output_tokens: NotRequired[Nullable[List[int]]] + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + frequency_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + include_output_logits: NotRequired[Nullable[bool]] + r"""Whether to include the output logits to the generation output.""" + include_output_logprobs: NotRequired[Nullable[bool]] + r"""Whether to include the output logprobs to the generation output.""" + length_penalty: NotRequired[Nullable[float]] + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + max_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + max_total_tokens: NotRequired[Nullable[int]] + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + min_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + min_total_tokens: NotRequired[Nullable[int]] + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + n: NotRequired[Nullable[int]] + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + no_repeat_ngram: NotRequired[Nullable[int]] + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + num_beams: NotRequired[Nullable[int]] + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + presence_penalty: NotRequired[Nullable[float]] + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + repetition_penalty: NotRequired[Nullable[float]] + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + response_format: NotRequired[Nullable[ResponseFormatTypedDict]] + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + seed: NotRequired[Nullable[List[int]]] + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + stop: NotRequired[Nullable[List[str]]] + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + stop_tokens: NotRequired[Nullable[List[TokenSequenceTypedDict]]] + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + stream: NotRequired[Nullable[bool]] + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + temperature: NotRequired[Nullable[float]] + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + timeout_microseconds: NotRequired[Nullable[int]] + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + token_index_to_replace: NotRequired[Nullable[List[int]]] + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + top_k: NotRequired[Nullable[int]] + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + top_p: NotRequired[Nullable[float]] + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + +class DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt(BaseModel): + prompt: str + r"""The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required.""" + + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + bad_word_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.""" + + bad_words: OptionalNullable[List[str]] = UNSET + r"""Text phrases that should not be generated. + For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf. + Before checking whether a bard word is included in the result, the word is converted into tokens. + We recommend using `bad_word_tokens` because it is clearer. + For example, after tokenization, phrases \"clear\" and \" clear\" can result in different token sequences due to the prepended space character. + Defaults to empty list. + + """ + + beam_compat_no_post_normalization: OptionalNullable[bool] = UNSET + + beam_compat_pre_normalization: OptionalNullable[bool] = UNSET + + beam_search_type: OptionalNullable[str] = "DETERMINISTIC" + r"""One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.""" + + early_stopping: OptionalNullable[bool] = False + r"""Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.""" + + embedding_to_replace: OptionalNullable[List[float]] = UNSET + r"""A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.""" + + encoder_no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.""" + + encoder_repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.""" + + eos_token: OptionalNullable[List[int]] = UNSET + r"""A list of endpoint sentence tokens.""" + + forced_output_tokens: OptionalNullable[List[int]] = UNSET + r"""A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.""" + + frequency_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" + + include_output_logits: OptionalNullable[bool] = UNSET + r"""Whether to include the output logits to the generation output.""" + + include_output_logprobs: OptionalNullable[bool] = UNSET + r"""Whether to include the output logprobs to the generation output.""" + + length_penalty: OptionalNullable[float] = UNSET + r"""Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.""" + + max_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.""" + + max_total_tokens: OptionalNullable[int] = UNSET + r"""The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.""" + + min_tokens: OptionalNullable[int] = 0 + r"""The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.""" + + min_total_tokens: OptionalNullable[int] = UNSET + r"""The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.""" + + n: OptionalNullable[int] = 1 + r"""The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.""" + + no_repeat_ngram: OptionalNullable[int] = 1 + r"""If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.""" + + num_beams: OptionalNullable[int] = UNSET + r"""Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.""" + + presence_penalty: OptionalNullable[float] = UNSET + r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.""" + + repetition_penalty: OptionalNullable[float] = UNSET + r"""Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.""" + + response_format: OptionalNullable[ResponseFormat] = UNSET + r"""The enforced format of the model's output. + + Note that the content of the output message may be truncated if it exceeds the `max_tokens`. + You can check this by verifying that the `finish_reason` of the output message is `length`. + + ***Important*** + You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). + Otherwise, the model may result in an unending stream of whitespace or other characters. + + """ + + seed: OptionalNullable[List[int]] = UNSET + r"""Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.""" + + stop: OptionalNullable[List[str]] = UNSET + r"""When one of the stop phrases appears in the generation result, the API will stop generation. + The stop phrases are excluded from the result. + This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead. + Defaults to empty list. + + """ + + stop_tokens: OptionalNullable[List[TokenSequence]] = UNSET + r"""Stop generating further tokens when generated token corresponds to any of the tokens in the sequence. + If beam search is enabled, all of the active beams should contain the stop token to terminate generation. + + """ + + stream: OptionalNullable[bool] = True + r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" + + temperature: OptionalNullable[float] = 1 + r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" + + timeout_microseconds: OptionalNullable[int] = UNSET + r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" + + token_index_to_replace: OptionalNullable[List[int]] = UNSET + r"""A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.""" + + top_k: OptionalNullable[int] = 0 + r"""The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.""" + + top_p: OptionalNullable[float] = 1 + r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + nullable_fields = [ + "bad_word_tokens", + "bad_words", + "beam_compat_no_post_normalization", + "beam_compat_pre_normalization", + "beam_search_type", + "early_stopping", + "embedding_to_replace", + "encoder_no_repeat_ngram", + "encoder_repetition_penalty", + "eos_token", + "forced_output_tokens", + "frequency_penalty", + "include_output_logits", + "include_output_logprobs", + "length_penalty", + "max_tokens", + "max_total_tokens", + "min_tokens", + "min_total_tokens", + "n", + "no_repeat_ngram", + "num_beams", + "presence_penalty", + "repetition_penalty", + "response_format", + "seed", + "stop", + "stop_tokens", + "stream", + "temperature", + "timeout_microseconds", + "token_index_to_replace", + "top_k", + "top_p", + ] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +DedicatedCompletionsStreamBodyTypedDict = Union[ + DedicatedCompletionsStreamBodyCompletionsBodyWithPromptTypedDict, + DedicatedCompletionsStreamBodyCompletionsBodyWithTokensTypedDict, +] + + +DedicatedCompletionsStreamBody = Union[ + DedicatedCompletionsStreamBodyCompletionsBodyWithPrompt, + DedicatedCompletionsStreamBodyCompletionsBodyWithTokens, +] diff --git a/src/friendli/models/dedicatedcompletionsstreamop.py b/src/friendli/models/dedicatedcompletionsstreamop.py index 22dcaa4..18da3b1 100644 --- a/src/friendli/models/dedicatedcompletionsstreamop.py +++ b/src/friendli/models/dedicatedcompletionsstreamop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .completionsstreambody import CompletionsStreamBody, CompletionsStreamBodyTypedDict +from .dedicatedcompletionsstreambody import ( + DedicatedCompletionsStreamBody, + DedicatedCompletionsStreamBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class DedicatedCompletionsStreamRequestTypedDict(TypedDict): - completions_stream_body: CompletionsStreamBodyTypedDict + dedicated_completions_stream_body: DedicatedCompletionsStreamBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class DedicatedCompletionsStreamRequest(BaseModel): - completions_stream_body: Annotated[ - CompletionsStreamBody, + dedicated_completions_stream_body: Annotated[ + DedicatedCompletionsStreamBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/dedicateddetokenizationbody.py b/src/friendli/models/dedicateddetokenizationbody.py new file mode 100644 index 0000000..c38d62c --- /dev/null +++ b/src/friendli/models/dedicateddetokenizationbody.py @@ -0,0 +1,21 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from friendli.types import BaseModel +from typing import List, Optional +from typing_extensions import NotRequired, TypedDict + + +class DedicatedDetokenizationBodyTypedDict(TypedDict): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + tokens: NotRequired[List[int]] + r"""A token sequence to detokenize.""" + + +class DedicatedDetokenizationBody(BaseModel): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + tokens: Optional[List[int]] = None + r"""A token sequence to detokenize.""" diff --git a/src/friendli/models/dedicateddetokenizationop.py b/src/friendli/models/dedicateddetokenizationop.py index 799af50..ebbf667 100644 --- a/src/friendli/models/dedicateddetokenizationop.py +++ b/src/friendli/models/dedicateddetokenizationop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .detokenizationbody import DetokenizationBody, DetokenizationBodyTypedDict +from .dedicateddetokenizationbody import ( + DedicatedDetokenizationBody, + DedicatedDetokenizationBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class DedicatedDetokenizationRequestTypedDict(TypedDict): - detokenization_body: DetokenizationBodyTypedDict + dedicated_detokenization_body: DedicatedDetokenizationBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class DedicatedDetokenizationRequest(BaseModel): - detokenization_body: Annotated[ - DetokenizationBody, + dedicated_detokenization_body: Annotated[ + DedicatedDetokenizationBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/dedicatedtokenizationbody.py b/src/friendli/models/dedicatedtokenizationbody.py new file mode 100644 index 0000000..fae49f8 --- /dev/null +++ b/src/friendli/models/dedicatedtokenizationbody.py @@ -0,0 +1,20 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from friendli.types import BaseModel +from typing_extensions import TypedDict + + +class DedicatedTokenizationBodyTypedDict(TypedDict): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + prompt: str + r"""Input text prompt to tokenize.""" + + +class DedicatedTokenizationBody(BaseModel): + model: str + r"""ID of target endpoint. If you want to send request to specific adapter, using \"ENDPOINT_ID:ADAPTER_ROUTE\" format.""" + + prompt: str + r"""Input text prompt to tokenize.""" diff --git a/src/friendli/models/dedicatedtokenizationop.py b/src/friendli/models/dedicatedtokenizationop.py index cc88f0d..340979e 100644 --- a/src/friendli/models/dedicatedtokenizationop.py +++ b/src/friendli/models/dedicatedtokenizationop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .tokenizationbody import TokenizationBody, TokenizationBodyTypedDict +from .dedicatedtokenizationbody import ( + DedicatedTokenizationBody, + DedicatedTokenizationBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class DedicatedTokenizationRequestTypedDict(TypedDict): - tokenization_body: TokenizationBodyTypedDict + dedicated_tokenization_body: DedicatedTokenizationBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class DedicatedTokenizationRequest(BaseModel): - tokenization_body: Annotated[ - TokenizationBody, + dedicated_tokenization_body: Annotated[ + DedicatedTokenizationBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/chatcompletebody.py b/src/friendli/models/serverlesschatcompletebody.py similarity index 99% rename from src/friendli/models/chatcompletebody.py rename to src/friendli/models/serverlesschatcompletebody.py index 590326e..4f4e80b 100644 --- a/src/friendli/models/chatcompletebody.py +++ b/src/friendli/models/serverlesschatcompletebody.py @@ -123,7 +123,7 @@ class Object(BaseModel): """ -class ChatCompleteBodyTypedDict(TypedDict): +class ServerlessChatCompleteBodyTypedDict(TypedDict): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" messages: List[MessageTypedDict] @@ -203,7 +203,7 @@ class ChatCompleteBodyTypedDict(TypedDict): r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" -class ChatCompleteBody(BaseModel): +class ServerlessChatCompleteBody(BaseModel): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" diff --git a/src/friendli/models/serverlesschatcompleteop.py b/src/friendli/models/serverlesschatcompleteop.py index 2f6bf46..ee90567 100644 --- a/src/friendli/models/serverlesschatcompleteop.py +++ b/src/friendli/models/serverlesschatcompleteop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .chatcompletebody import ChatCompleteBody, ChatCompleteBodyTypedDict +from .serverlesschatcompletebody import ( + ServerlessChatCompleteBody, + ServerlessChatCompleteBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class ServerlessChatCompleteRequestTypedDict(TypedDict): - chat_complete_body: ChatCompleteBodyTypedDict + serverless_chat_complete_body: ServerlessChatCompleteBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessChatCompleteRequest(BaseModel): - chat_complete_body: Annotated[ - ChatCompleteBody, + serverless_chat_complete_body: Annotated[ + ServerlessChatCompleteBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/chatstreambody.py b/src/friendli/models/serverlesschatstreambody.py similarity index 93% rename from src/friendli/models/chatstreambody.py rename to src/friendli/models/serverlesschatstreambody.py index f6e7230..677618a 100644 --- a/src/friendli/models/chatstreambody.py +++ b/src/friendli/models/serverlesschatstreambody.py @@ -10,15 +10,15 @@ from typing_extensions import NotRequired, TypedDict -class ChatStreamBodyLogitBiasTypedDict(TypedDict): +class ServerlessChatStreamBodyLogitBiasTypedDict(TypedDict): r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" -class ChatStreamBodyLogitBias(BaseModel): +class ServerlessChatStreamBodyLogitBias(BaseModel): r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" -class ChatStreamBodyStreamOptionsTypedDict(TypedDict): +class ServerlessChatStreamBodyStreamOptionsTypedDict(TypedDict): r"""Options related to stream. It can only be used when `stream: true`. @@ -32,7 +32,7 @@ class ChatStreamBodyStreamOptionsTypedDict(TypedDict): """ -class ChatStreamBodyStreamOptions(BaseModel): +class ServerlessChatStreamBodyStreamOptions(BaseModel): r"""Options related to stream. It can only be used when `stream: true`. @@ -76,34 +76,34 @@ def serialize_model(self, handler): return m -ChatStreamBodyToolChoiceType = Literal["function"] +ServerlessChatStreamBodyToolChoiceType = Literal["function"] r"""The type of the tool. Currently, only `function` is supported.""" -class ChatStreamBodyToolChoiceFunctionTypedDict(TypedDict): +class ServerlessChatStreamBodyToolChoiceFunctionTypedDict(TypedDict): name: str r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" -class ChatStreamBodyToolChoiceFunction(BaseModel): +class ServerlessChatStreamBodyToolChoiceFunction(BaseModel): name: str r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" class ToolChoiceObjectTypedDict(TypedDict): - type: ChatStreamBodyToolChoiceType + type: ServerlessChatStreamBodyToolChoiceType r"""The type of the tool. Currently, only `function` is supported.""" - function: ChatStreamBodyToolChoiceFunctionTypedDict + function: ServerlessChatStreamBodyToolChoiceFunctionTypedDict class ToolChoiceObject(BaseModel): - type: ChatStreamBodyToolChoiceType + type: ServerlessChatStreamBodyToolChoiceType r"""The type of the tool. Currently, only `function` is supported.""" - function: ChatStreamBodyToolChoiceFunction + function: ServerlessChatStreamBodyToolChoiceFunction -ChatStreamBodyToolChoiceTypedDict = Union[ToolChoiceObjectTypedDict, str] +ServerlessChatStreamBodyToolChoiceTypedDict = Union[ToolChoiceObjectTypedDict, str] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. @@ -113,7 +113,7 @@ class ToolChoiceObject(BaseModel): """ -ChatStreamBodyToolChoice = Union[ToolChoiceObject, str] +ServerlessChatStreamBodyToolChoice = Union[ToolChoiceObject, str] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. @@ -123,7 +123,7 @@ class ToolChoiceObject(BaseModel): """ -class ChatStreamBodyTypedDict(TypedDict): +class ServerlessChatStreamBodyTypedDict(TypedDict): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" messages: List[MessageTypedDict] @@ -132,7 +132,7 @@ class ChatStreamBodyTypedDict(TypedDict): r"""A list of endpoint sentence tokens.""" frequency_penalty: NotRequired[Nullable[float]] r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" - logit_bias: NotRequired[Nullable[ChatStreamBodyLogitBiasTypedDict]] + logit_bias: NotRequired[Nullable[ServerlessChatStreamBodyLogitBiasTypedDict]] r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" logprobs: NotRequired[Nullable[bool]] r"""Whether to return log probabilities of the output tokens or not.""" @@ -169,7 +169,9 @@ class ChatStreamBodyTypedDict(TypedDict): r"""When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.""" stream: NotRequired[Nullable[bool]] r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" - stream_options: NotRequired[Nullable[ChatStreamBodyStreamOptionsTypedDict]] + stream_options: NotRequired[ + Nullable[ServerlessChatStreamBodyStreamOptionsTypedDict] + ] r"""Options related to stream. It can only be used when `stream: true`. @@ -178,7 +180,7 @@ class ChatStreamBodyTypedDict(TypedDict): r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" timeout_microseconds: NotRequired[Nullable[int]] r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" - tool_choice: NotRequired[ChatStreamBodyToolChoiceTypedDict] + tool_choice: NotRequired[ServerlessChatStreamBodyToolChoiceTypedDict] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. @@ -203,7 +205,7 @@ class ChatStreamBodyTypedDict(TypedDict): r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" -class ChatStreamBody(BaseModel): +class ServerlessChatStreamBody(BaseModel): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" @@ -216,7 +218,7 @@ class ChatStreamBody(BaseModel): frequency_penalty: OptionalNullable[float] = UNSET r"""Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.""" - logit_bias: OptionalNullable[ChatStreamBodyLogitBias] = UNSET + logit_bias: OptionalNullable[ServerlessChatStreamBodyLogitBias] = UNSET r"""Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.""" logprobs: OptionalNullable[bool] = UNSET @@ -265,7 +267,7 @@ class ChatStreamBody(BaseModel): stream: OptionalNullable[bool] = True r"""Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.""" - stream_options: OptionalNullable[ChatStreamBodyStreamOptions] = UNSET + stream_options: OptionalNullable[ServerlessChatStreamBodyStreamOptions] = UNSET r"""Options related to stream. It can only be used when `stream: true`. @@ -277,7 +279,7 @@ class ChatStreamBody(BaseModel): timeout_microseconds: OptionalNullable[int] = UNSET r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" - tool_choice: Optional[ChatStreamBodyToolChoice] = None + tool_choice: Optional[ServerlessChatStreamBodyToolChoice] = None r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. diff --git a/src/friendli/models/serverlesschatstreamop.py b/src/friendli/models/serverlesschatstreamop.py index 39c6319..1d95d2f 100644 --- a/src/friendli/models/serverlesschatstreamop.py +++ b/src/friendli/models/serverlesschatstreamop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .chatstreambody import ChatStreamBody, ChatStreamBodyTypedDict +from .serverlesschatstreambody import ( + ServerlessChatStreamBody, + ServerlessChatStreamBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class ServerlessChatStreamRequestTypedDict(TypedDict): - chat_stream_body: ChatStreamBodyTypedDict + serverless_chat_stream_body: ServerlessChatStreamBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessChatStreamRequest(BaseModel): - chat_stream_body: Annotated[ - ChatStreamBody, + serverless_chat_stream_body: Annotated[ + ServerlessChatStreamBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/completionscompletebody.py b/src/friendli/models/serverlesscompletionscompletebody.py similarity index 74% rename from src/friendli/models/completionscompletebody.py rename to src/friendli/models/serverlesscompletionscompletebody.py index 83492e6..af22ba8 100644 --- a/src/friendli/models/completionscompletebody.py +++ b/src/friendli/models/serverlesscompletionscompletebody.py @@ -12,9 +12,11 @@ from typing import Union -CompletionsCompleteBodyTypedDict = Union[ +ServerlessCompletionsCompleteBodyTypedDict = Union[ CompletionsBodyWithPromptTypedDict, CompletionsBodyWithTokensTypedDict ] -CompletionsCompleteBody = Union[CompletionsBodyWithPrompt, CompletionsBodyWithTokens] +ServerlessCompletionsCompleteBody = Union[ + CompletionsBodyWithPrompt, CompletionsBodyWithTokens +] diff --git a/src/friendli/models/serverlesscompletionscompleteop.py b/src/friendli/models/serverlesscompletionscompleteop.py index e3a64c0..880d2fe 100644 --- a/src/friendli/models/serverlesscompletionscompleteop.py +++ b/src/friendli/models/serverlesscompletionscompleteop.py @@ -1,9 +1,9 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .completionscompletebody import ( - CompletionsCompleteBody, - CompletionsCompleteBodyTypedDict, +from .serverlesscompletionscompletebody import ( + ServerlessCompletionsCompleteBody, + ServerlessCompletionsCompleteBodyTypedDict, ) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata @@ -13,14 +13,14 @@ class ServerlessCompletionsCompleteRequestTypedDict(TypedDict): - completions_complete_body: CompletionsCompleteBodyTypedDict + serverless_completions_complete_body: ServerlessCompletionsCompleteBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessCompletionsCompleteRequest(BaseModel): - completions_complete_body: Annotated[ - CompletionsCompleteBody, + serverless_completions_complete_body: Annotated[ + ServerlessCompletionsCompleteBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/completionsstreambody.py b/src/friendli/models/serverlesscompletionsstreambody.py similarity index 98% rename from src/friendli/models/completionsstreambody.py rename to src/friendli/models/serverlesscompletionsstreambody.py index 3a03dc0..60d84e0 100644 --- a/src/friendli/models/completionsstreambody.py +++ b/src/friendli/models/serverlesscompletionsstreambody.py @@ -9,7 +9,7 @@ from typing_extensions import NotRequired, TypedDict -class CompletionsStreamBodyCompletionsBodyWithTokensTypedDict(TypedDict): +class ServerlessCompletionsStreamBodyCompletionsBodyWithTokensTypedDict(TypedDict): tokens: List[int] r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.""" model: str @@ -106,7 +106,7 @@ class CompletionsStreamBodyCompletionsBodyWithTokensTypedDict(TypedDict): r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" -class CompletionsStreamBodyCompletionsBodyWithTokens(BaseModel): +class ServerlessCompletionsStreamBodyCompletionsBodyWithTokens(BaseModel): tokens: List[int] r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.""" @@ -338,7 +338,7 @@ def serialize_model(self, handler): return m -class CompletionsStreamBodyCompletionsBodyWithPromptTypedDict(TypedDict): +class ServerlessCompletionsStreamBodyCompletionsBodyWithPromptTypedDict(TypedDict): prompt: str r"""The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required.""" model: str @@ -435,7 +435,7 @@ class CompletionsStreamBodyCompletionsBodyWithPromptTypedDict(TypedDict): r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" -class CompletionsStreamBodyCompletionsBodyWithPrompt(BaseModel): +class ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt(BaseModel): prompt: str r"""The prompt (i.e., input text) to generate completions for. Either `prompt` or `tokens` field is required.""" @@ -667,13 +667,13 @@ def serialize_model(self, handler): return m -CompletionsStreamBodyTypedDict = Union[ - CompletionsStreamBodyCompletionsBodyWithPromptTypedDict, - CompletionsStreamBodyCompletionsBodyWithTokensTypedDict, +ServerlessCompletionsStreamBodyTypedDict = Union[ + ServerlessCompletionsStreamBodyCompletionsBodyWithPromptTypedDict, + ServerlessCompletionsStreamBodyCompletionsBodyWithTokensTypedDict, ] -CompletionsStreamBody = Union[ - CompletionsStreamBodyCompletionsBodyWithPrompt, - CompletionsStreamBodyCompletionsBodyWithTokens, +ServerlessCompletionsStreamBody = Union[ + ServerlessCompletionsStreamBodyCompletionsBodyWithPrompt, + ServerlessCompletionsStreamBodyCompletionsBodyWithTokens, ] diff --git a/src/friendli/models/serverlesscompletionsstreamop.py b/src/friendli/models/serverlesscompletionsstreamop.py index a01b052..d06c531 100644 --- a/src/friendli/models/serverlesscompletionsstreamop.py +++ b/src/friendli/models/serverlesscompletionsstreamop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .completionsstreambody import CompletionsStreamBody, CompletionsStreamBodyTypedDict +from .serverlesscompletionsstreambody import ( + ServerlessCompletionsStreamBody, + ServerlessCompletionsStreamBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class ServerlessCompletionsStreamRequestTypedDict(TypedDict): - completions_stream_body: CompletionsStreamBodyTypedDict + serverless_completions_stream_body: ServerlessCompletionsStreamBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessCompletionsStreamRequest(BaseModel): - completions_stream_body: Annotated[ - CompletionsStreamBody, + serverless_completions_stream_body: Annotated[ + ServerlessCompletionsStreamBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/detokenizationbody.py b/src/friendli/models/serverlessdetokenizationbody.py similarity index 88% rename from src/friendli/models/detokenizationbody.py rename to src/friendli/models/serverlessdetokenizationbody.py index 7fca573..7de4148 100644 --- a/src/friendli/models/detokenizationbody.py +++ b/src/friendli/models/serverlessdetokenizationbody.py @@ -6,14 +6,14 @@ from typing_extensions import NotRequired, TypedDict -class DetokenizationBodyTypedDict(TypedDict): +class ServerlessDetokenizationBodyTypedDict(TypedDict): model: NotRequired[str] r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" tokens: NotRequired[List[int]] r"""A token sequence to detokenize.""" -class DetokenizationBody(BaseModel): +class ServerlessDetokenizationBody(BaseModel): model: Optional[str] = None r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" diff --git a/src/friendli/models/serverlessdetokenizationop.py b/src/friendli/models/serverlessdetokenizationop.py index 57da1c9..9df01f6 100644 --- a/src/friendli/models/serverlessdetokenizationop.py +++ b/src/friendli/models/serverlessdetokenizationop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .detokenizationbody import DetokenizationBody, DetokenizationBodyTypedDict +from .serverlessdetokenizationbody import ( + ServerlessDetokenizationBody, + ServerlessDetokenizationBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class ServerlessDetokenizationRequestTypedDict(TypedDict): - detokenization_body: DetokenizationBodyTypedDict + serverless_detokenization_body: ServerlessDetokenizationBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessDetokenizationRequest(BaseModel): - detokenization_body: Annotated[ - DetokenizationBody, + serverless_detokenization_body: Annotated[ + ServerlessDetokenizationBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/tokenizationbody.py b/src/friendli/models/serverlesstokenizationbody.py similarity index 86% rename from src/friendli/models/tokenizationbody.py rename to src/friendli/models/serverlesstokenizationbody.py index fcb073c..99db568 100644 --- a/src/friendli/models/tokenizationbody.py +++ b/src/friendli/models/serverlesstokenizationbody.py @@ -5,14 +5,14 @@ from typing_extensions import TypedDict -class TokenizationBodyTypedDict(TypedDict): +class ServerlessTokenizationBodyTypedDict(TypedDict): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" prompt: str r"""Input text prompt to tokenize.""" -class TokenizationBody(BaseModel): +class ServerlessTokenizationBody(BaseModel): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" diff --git a/src/friendli/models/serverlesstokenizationop.py b/src/friendli/models/serverlesstokenizationop.py index 6480733..7a15660 100644 --- a/src/friendli/models/serverlesstokenizationop.py +++ b/src/friendli/models/serverlesstokenizationop.py @@ -1,7 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .tokenizationbody import TokenizationBody, TokenizationBodyTypedDict +from .serverlesstokenizationbody import ( + ServerlessTokenizationBody, + ServerlessTokenizationBodyTypedDict, +) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata import pydantic @@ -10,14 +13,14 @@ class ServerlessTokenizationRequestTypedDict(TypedDict): - tokenization_body: TokenizationBodyTypedDict + serverless_tokenization_body: ServerlessTokenizationBodyTypedDict x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessTokenizationRequest(BaseModel): - tokenization_body: Annotated[ - TokenizationBody, + serverless_tokenization_body: Annotated[ + ServerlessTokenizationBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/toolassistedchatcompletebody.py b/src/friendli/models/serverlesstoolassistedchatcompletebody.py similarity index 93% rename from src/friendli/models/toolassistedchatcompletebody.py rename to src/friendli/models/serverlesstoolassistedchatcompletebody.py index 9627b14..fe401c1 100644 --- a/src/friendli/models/toolassistedchatcompletebody.py +++ b/src/friendli/models/serverlesstoolassistedchatcompletebody.py @@ -10,35 +10,35 @@ from typing_extensions import NotRequired, TypedDict -ToolAssistedChatCompleteBodyToolChoiceType = Literal["function"] +ServerlessToolAssistedChatCompleteBodyToolChoiceType = Literal["function"] r"""The type of the tool. Currently, only `function` is supported.""" -class ToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict(TypedDict): +class ServerlessToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict(TypedDict): name: str r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" -class ToolAssistedChatCompleteBodyToolChoiceFunction(BaseModel): +class ServerlessToolAssistedChatCompleteBodyToolChoiceFunction(BaseModel): name: str r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" -class ToolAssistedChatCompleteBodyToolChoiceObjectTypedDict(TypedDict): - type: ToolAssistedChatCompleteBodyToolChoiceType +class ServerlessToolAssistedChatCompleteBodyToolChoiceObjectTypedDict(TypedDict): + type: ServerlessToolAssistedChatCompleteBodyToolChoiceType r"""The type of the tool. Currently, only `function` is supported.""" - function: ToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict + function: ServerlessToolAssistedChatCompleteBodyToolChoiceFunctionTypedDict -class ToolAssistedChatCompleteBodyToolChoiceObject(BaseModel): - type: ToolAssistedChatCompleteBodyToolChoiceType +class ServerlessToolAssistedChatCompleteBodyToolChoiceObject(BaseModel): + type: ServerlessToolAssistedChatCompleteBodyToolChoiceType r"""The type of the tool. Currently, only `function` is supported.""" - function: ToolAssistedChatCompleteBodyToolChoiceFunction + function: ServerlessToolAssistedChatCompleteBodyToolChoiceFunction -ToolAssistedChatCompleteBodyToolChoiceTypedDict = Union[ - ToolAssistedChatCompleteBodyToolChoiceObjectTypedDict, str +ServerlessToolAssistedChatCompleteBodyToolChoiceTypedDict = Union[ + ServerlessToolAssistedChatCompleteBodyToolChoiceObjectTypedDict, str ] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. @@ -49,8 +49,8 @@ class ToolAssistedChatCompleteBodyToolChoiceObject(BaseModel): """ -ToolAssistedChatCompleteBodyToolChoice = Union[ - ToolAssistedChatCompleteBodyToolChoiceObject, str +ServerlessToolAssistedChatCompleteBodyToolChoice = Union[ + ServerlessToolAssistedChatCompleteBodyToolChoiceObject, str ] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. @@ -61,7 +61,7 @@ class ToolAssistedChatCompleteBodyToolChoiceObject(BaseModel): """ -class ToolAssistedChatCompleteBodyTypedDict(TypedDict): +class ServerlessToolAssistedChatCompleteBodyTypedDict(TypedDict): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" messages: List[MessageTypedDict] @@ -118,7 +118,7 @@ class ToolAssistedChatCompleteBodyTypedDict(TypedDict): r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" timeout_microseconds: NotRequired[Nullable[int]] r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" - tool_choice: NotRequired[ToolAssistedChatCompleteBodyToolChoiceTypedDict] + tool_choice: NotRequired[ServerlessToolAssistedChatCompleteBodyToolChoiceTypedDict] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. @@ -141,7 +141,7 @@ class ToolAssistedChatCompleteBodyTypedDict(TypedDict): r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" -class ToolAssistedChatCompleteBody(BaseModel): +class ServerlessToolAssistedChatCompleteBody(BaseModel): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" @@ -215,7 +215,7 @@ class ToolAssistedChatCompleteBody(BaseModel): timeout_microseconds: OptionalNullable[int] = UNSET r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" - tool_choice: Optional[ToolAssistedChatCompleteBodyToolChoice] = None + tool_choice: Optional[ServerlessToolAssistedChatCompleteBodyToolChoice] = None r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. diff --git a/src/friendli/models/serverlesstoolassistedchatcompleteop.py b/src/friendli/models/serverlesstoolassistedchatcompleteop.py index 3998b22..a3a05d9 100644 --- a/src/friendli/models/serverlesstoolassistedchatcompleteop.py +++ b/src/friendli/models/serverlesstoolassistedchatcompleteop.py @@ -1,9 +1,9 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .toolassistedchatcompletebody import ( - ToolAssistedChatCompleteBody, - ToolAssistedChatCompleteBodyTypedDict, +from .serverlesstoolassistedchatcompletebody import ( + ServerlessToolAssistedChatCompleteBody, + ServerlessToolAssistedChatCompleteBodyTypedDict, ) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata @@ -13,14 +13,16 @@ class ServerlessToolAssistedChatCompleteRequestTypedDict(TypedDict): - tool_assisted_chat_complete_body: ToolAssistedChatCompleteBodyTypedDict + serverless_tool_assisted_chat_complete_body: ( + ServerlessToolAssistedChatCompleteBodyTypedDict + ) x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessToolAssistedChatCompleteRequest(BaseModel): - tool_assisted_chat_complete_body: Annotated[ - ToolAssistedChatCompleteBody, + serverless_tool_assisted_chat_complete_body: Annotated[ + ServerlessToolAssistedChatCompleteBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/models/toolassistedchatstreambody.py b/src/friendli/models/serverlesstoolassistedchatstreambody.py similarity index 93% rename from src/friendli/models/toolassistedchatstreambody.py rename to src/friendli/models/serverlesstoolassistedchatstreambody.py index e646575..9ca972a 100644 --- a/src/friendli/models/toolassistedchatstreambody.py +++ b/src/friendli/models/serverlesstoolassistedchatstreambody.py @@ -10,35 +10,35 @@ from typing_extensions import NotRequired, TypedDict -ToolAssistedChatStreamBodyToolChoiceType = Literal["function"] +ServerlessToolAssistedChatStreamBodyToolChoiceType = Literal["function"] r"""The type of the tool. Currently, only `function` is supported.""" -class ToolAssistedChatStreamBodyToolChoiceFunctionTypedDict(TypedDict): +class ServerlessToolAssistedChatStreamBodyToolChoiceFunctionTypedDict(TypedDict): name: str r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" -class ToolAssistedChatStreamBodyToolChoiceFunction(BaseModel): +class ServerlessToolAssistedChatStreamBodyToolChoiceFunction(BaseModel): name: str r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.""" -class ToolAssistedChatStreamBodyToolChoiceObjectTypedDict(TypedDict): - type: ToolAssistedChatStreamBodyToolChoiceType +class ServerlessToolAssistedChatStreamBodyToolChoiceObjectTypedDict(TypedDict): + type: ServerlessToolAssistedChatStreamBodyToolChoiceType r"""The type of the tool. Currently, only `function` is supported.""" - function: ToolAssistedChatStreamBodyToolChoiceFunctionTypedDict + function: ServerlessToolAssistedChatStreamBodyToolChoiceFunctionTypedDict -class ToolAssistedChatStreamBodyToolChoiceObject(BaseModel): - type: ToolAssistedChatStreamBodyToolChoiceType +class ServerlessToolAssistedChatStreamBodyToolChoiceObject(BaseModel): + type: ServerlessToolAssistedChatStreamBodyToolChoiceType r"""The type of the tool. Currently, only `function` is supported.""" - function: ToolAssistedChatStreamBodyToolChoiceFunction + function: ServerlessToolAssistedChatStreamBodyToolChoiceFunction -ToolAssistedChatStreamBodyToolChoiceTypedDict = Union[ - ToolAssistedChatStreamBodyToolChoiceObjectTypedDict, str +ServerlessToolAssistedChatStreamBodyToolChoiceTypedDict = Union[ + ServerlessToolAssistedChatStreamBodyToolChoiceObjectTypedDict, str ] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. @@ -49,8 +49,8 @@ class ToolAssistedChatStreamBodyToolChoiceObject(BaseModel): """ -ToolAssistedChatStreamBodyToolChoice = Union[ - ToolAssistedChatStreamBodyToolChoiceObject, str +ServerlessToolAssistedChatStreamBodyToolChoice = Union[ + ServerlessToolAssistedChatStreamBodyToolChoiceObject, str ] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. @@ -61,7 +61,7 @@ class ToolAssistedChatStreamBodyToolChoiceObject(BaseModel): """ -class ToolAssistedChatStreamBodyTypedDict(TypedDict): +class ServerlessToolAssistedChatStreamBodyTypedDict(TypedDict): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" messages: List[MessageTypedDict] @@ -118,7 +118,7 @@ class ToolAssistedChatStreamBodyTypedDict(TypedDict): r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.""" timeout_microseconds: NotRequired[Nullable[int]] r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" - tool_choice: NotRequired[ToolAssistedChatStreamBodyToolChoiceTypedDict] + tool_choice: NotRequired[ServerlessToolAssistedChatStreamBodyToolChoiceTypedDict] r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. @@ -141,7 +141,7 @@ class ToolAssistedChatStreamBodyTypedDict(TypedDict): r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.""" -class ToolAssistedChatStreamBody(BaseModel): +class ServerlessToolAssistedChatStreamBody(BaseModel): model: str r"""Code of the model to use. See [available model list](https://friendli.ai/docs/guides/serverless_endpoints/pricing#text-generation-models).""" @@ -215,7 +215,7 @@ class ToolAssistedChatStreamBody(BaseModel): timeout_microseconds: OptionalNullable[int] = UNSET r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.""" - tool_choice: Optional[ToolAssistedChatStreamBodyToolChoice] = None + tool_choice: Optional[ServerlessToolAssistedChatStreamBodyToolChoice] = None r"""Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. diff --git a/src/friendli/models/serverlesstoolassistedchatstreamop.py b/src/friendli/models/serverlesstoolassistedchatstreamop.py index 5dd242d..2893bd3 100644 --- a/src/friendli/models/serverlesstoolassistedchatstreamop.py +++ b/src/friendli/models/serverlesstoolassistedchatstreamop.py @@ -1,9 +1,9 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations -from .toolassistedchatstreambody import ( - ToolAssistedChatStreamBody, - ToolAssistedChatStreamBodyTypedDict, +from .serverlesstoolassistedchatstreambody import ( + ServerlessToolAssistedChatStreamBody, + ServerlessToolAssistedChatStreamBodyTypedDict, ) from friendli.types import BaseModel from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata @@ -13,14 +13,16 @@ class ServerlessToolAssistedChatStreamRequestTypedDict(TypedDict): - tool_assisted_chat_stream_body: ToolAssistedChatStreamBodyTypedDict + serverless_tool_assisted_chat_stream_body: ( + ServerlessToolAssistedChatStreamBodyTypedDict + ) x_friendli_team: NotRequired[str] r"""ID of team to run requests as (optional parameter).""" class ServerlessToolAssistedChatStreamRequest(BaseModel): - tool_assisted_chat_stream_body: Annotated[ - ToolAssistedChatStreamBody, + serverless_tool_assisted_chat_stream_body: Annotated[ + ServerlessToolAssistedChatStreamBody, FieldMetadata(request=RequestMetadata(media_type="application/json")), ] diff --git a/src/friendli/sdkconfiguration.py b/src/friendli/sdkconfiguration.py index a7f983e..49c563a 100644 --- a/src/friendli/sdkconfiguration.py +++ b/src/friendli/sdkconfiguration.py @@ -26,9 +26,9 @@ class SDKConfiguration: server_idx: Optional[int] = 0 language: str = "python" openapi_doc_version: str = "v1" - sdk_version: str = "0.2.25" + sdk_version: str = "0.2.30" gen_version: str = "2.457.2" - user_agent: str = "speakeasy-sdk/python 0.2.25 2.457.2 v1 friendli" + user_agent: str = "speakeasy-sdk/python 0.2.30 2.457.2 v1 friendli" retry_config: OptionalNullable[RetryConfig] = Field(default_factory=lambda: UNSET) timeout_ms: Optional[int] = None diff --git a/src/friendli/token.py b/src/friendli/token.py index e8328cb..ec8c4df 100644 --- a/src/friendli/token.py +++ b/src/friendli/token.py @@ -40,7 +40,7 @@ def tokenization( request = models.ServerlessTokenizationRequest( x_friendli_team=x_friendli_team, - tokenization_body=models.TokenizationBody( + serverless_tokenization_body=models.ServerlessTokenizationBody( model=model, prompt=prompt, ), @@ -59,7 +59,11 @@ def tokenization( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tokenization_body, False, False, "json", models.TokenizationBody + request.serverless_tokenization_body, + False, + False, + "json", + models.ServerlessTokenizationBody, ), timeout_ms=timeout_ms, ) @@ -133,7 +137,7 @@ async def tokenization_async( request = models.ServerlessTokenizationRequest( x_friendli_team=x_friendli_team, - tokenization_body=models.TokenizationBody( + serverless_tokenization_body=models.ServerlessTokenizationBody( model=model, prompt=prompt, ), @@ -152,7 +156,11 @@ async def tokenization_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tokenization_body, False, False, "json", models.TokenizationBody + request.serverless_tokenization_body, + False, + False, + "json", + models.ServerlessTokenizationBody, ), timeout_ms=timeout_ms, ) @@ -226,7 +234,7 @@ def detokenization( request = models.ServerlessDetokenizationRequest( x_friendli_team=x_friendli_team, - detokenization_body=models.DetokenizationBody( + serverless_detokenization_body=models.ServerlessDetokenizationBody( model=model, tokens=tokens, ), @@ -245,11 +253,11 @@ def detokenization( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.detokenization_body, + request.serverless_detokenization_body, False, False, "json", - models.DetokenizationBody, + models.ServerlessDetokenizationBody, ), timeout_ms=timeout_ms, ) @@ -323,7 +331,7 @@ async def detokenization_async( request = models.ServerlessDetokenizationRequest( x_friendli_team=x_friendli_team, - detokenization_body=models.DetokenizationBody( + serverless_detokenization_body=models.ServerlessDetokenizationBody( model=model, tokens=tokens, ), @@ -342,11 +350,11 @@ async def detokenization_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.detokenization_body, + request.serverless_detokenization_body, False, False, "json", - models.DetokenizationBody, + models.ServerlessDetokenizationBody, ), timeout_ms=timeout_ms, ) diff --git a/src/friendli/toolassistedchat.py b/src/friendli/toolassistedchat.py index 1795f8d..04bc945 100644 --- a/src/friendli/toolassistedchat.py +++ b/src/friendli/toolassistedchat.py @@ -34,8 +34,8 @@ def complete( timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ToolAssistedChatCompleteBodyToolChoice, - models.ToolAssistedChatCompleteBodyToolChoiceTypedDict, + models.ServerlessToolAssistedChatCompleteBodyToolChoice, + models.ServerlessToolAssistedChatCompleteBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -90,7 +90,7 @@ def complete( request = models.ServerlessToolAssistedChatCompleteRequest( x_friendli_team=x_friendli_team, - tool_assisted_chat_complete_body=models.ToolAssistedChatCompleteBody( + serverless_tool_assisted_chat_complete_body=models.ServerlessToolAssistedChatCompleteBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, @@ -111,7 +111,8 @@ def complete( temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ToolAssistedChatCompleteBodyToolChoice] + tool_choice, + Optional[models.ServerlessToolAssistedChatCompleteBodyToolChoice], ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.ToolAssistedChatTool]] @@ -134,11 +135,11 @@ def complete( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tool_assisted_chat_complete_body, + request.serverless_tool_assisted_chat_complete_body, False, False, "json", - models.ToolAssistedChatCompleteBody, + models.ServerlessToolAssistedChatCompleteBody, ), timeout_ms=timeout_ms, ) @@ -206,8 +207,8 @@ async def complete_async( timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ToolAssistedChatCompleteBodyToolChoice, - models.ToolAssistedChatCompleteBodyToolChoiceTypedDict, + models.ServerlessToolAssistedChatCompleteBodyToolChoice, + models.ServerlessToolAssistedChatCompleteBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -262,7 +263,7 @@ async def complete_async( request = models.ServerlessToolAssistedChatCompleteRequest( x_friendli_team=x_friendli_team, - tool_assisted_chat_complete_body=models.ToolAssistedChatCompleteBody( + serverless_tool_assisted_chat_complete_body=models.ServerlessToolAssistedChatCompleteBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, @@ -283,7 +284,8 @@ async def complete_async( temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ToolAssistedChatCompleteBodyToolChoice] + tool_choice, + Optional[models.ServerlessToolAssistedChatCompleteBodyToolChoice], ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.ToolAssistedChatTool]] @@ -306,11 +308,11 @@ async def complete_async( accept_header_value="application/json", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tool_assisted_chat_complete_body, + request.serverless_tool_assisted_chat_complete_body, False, False, "json", - models.ToolAssistedChatCompleteBody, + models.ServerlessToolAssistedChatCompleteBody, ), timeout_ms=timeout_ms, ) @@ -378,8 +380,8 @@ def stream( timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ToolAssistedChatStreamBodyToolChoice, - models.ToolAssistedChatStreamBodyToolChoiceTypedDict, + models.ServerlessToolAssistedChatStreamBodyToolChoice, + models.ServerlessToolAssistedChatStreamBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -434,7 +436,7 @@ def stream( request = models.ServerlessToolAssistedChatStreamRequest( x_friendli_team=x_friendli_team, - tool_assisted_chat_stream_body=models.ToolAssistedChatStreamBody( + serverless_tool_assisted_chat_stream_body=models.ServerlessToolAssistedChatStreamBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, @@ -455,7 +457,8 @@ def stream( temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ToolAssistedChatStreamBodyToolChoice] + tool_choice, + Optional[models.ServerlessToolAssistedChatStreamBodyToolChoice], ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.ToolAssistedChatTool]] @@ -478,11 +481,11 @@ def stream( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tool_assisted_chat_stream_body, + request.serverless_tool_assisted_chat_stream_body, False, False, "json", - models.ToolAssistedChatStreamBody, + models.ServerlessToolAssistedChatStreamBody, ), timeout_ms=timeout_ms, ) @@ -557,8 +560,8 @@ async def stream_async( timeout_microseconds: OptionalNullable[int] = UNSET, tool_choice: Optional[ Union[ - models.ToolAssistedChatStreamBodyToolChoice, - models.ToolAssistedChatStreamBodyToolChoiceTypedDict, + models.ServerlessToolAssistedChatStreamBodyToolChoice, + models.ServerlessToolAssistedChatStreamBodyToolChoiceTypedDict, ] ] = None, tools: OptionalNullable[ @@ -613,7 +616,7 @@ async def stream_async( request = models.ServerlessToolAssistedChatStreamRequest( x_friendli_team=x_friendli_team, - tool_assisted_chat_stream_body=models.ToolAssistedChatStreamBody( + serverless_tool_assisted_chat_stream_body=models.ServerlessToolAssistedChatStreamBody( model=model, messages=utils.get_pydantic_model(messages, List[models.Message]), eos_token=eos_token, @@ -634,7 +637,8 @@ async def stream_async( temperature=temperature, timeout_microseconds=timeout_microseconds, tool_choice=utils.get_pydantic_model( - tool_choice, Optional[models.ToolAssistedChatStreamBodyToolChoice] + tool_choice, + Optional[models.ServerlessToolAssistedChatStreamBodyToolChoice], ), tools=utils.get_pydantic_model( tools, OptionalNullable[List[models.ToolAssistedChatTool]] @@ -657,11 +661,11 @@ async def stream_async( accept_header_value="text/event-stream", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( - request.tool_assisted_chat_stream_body, + request.serverless_tool_assisted_chat_stream_body, False, False, "json", - models.ToolAssistedChatStreamBody, + models.ServerlessToolAssistedChatStreamBody, ), timeout_ms=timeout_ms, )