From abd5ced1115db19485dae6e34f9fb1b1908c26e6 Mon Sep 17 00:00:00 2001
From: friendli-bot <104493380+friendli-bot@users.noreply.github.com>
Date: Fri, 8 Nov 2024 10:41:56 +0900
Subject: [PATCH] Update 2024-11-08 (#16)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
.speakeasy/gen.lock | 80 +-
.speakeasy/gen.yaml | 8 +-
.speakeasy/workflow.lock | 35 +-
.speakeasy/workflow.yaml | 13 +-
README.md | 326 ++--
RELEASES.md | 6 +-
USAGE.md | 58 +-
...onrequestbody.md => chatcompletionbody.md} | 4 +-
docs/models/chatcompletionrequest.md | 8 +-
docs/models/chatcompletionresponse.md | 19 +-
docs/models/chatcompletionresponse1.md | 17 -
docs/models/chatcompletionresult.md | 10 +
docs/models/completionbody.md | 17 +
...hprompt.md => completionbodywithprompt.md} | 2 +-
...htokens.md => completionbodywithtokens.md} | 2 +-
docs/models/completionrequest.md | 8 +-
docs/models/completionrequestbody.md | 17 -
docs/models/completionresponse.md | 18 +-
docs/models/completionresponse1.md | 17 -
docs/models/completionresult.md | 9 +
...onrequestbody.md => detokenizationbody.md} | 2 +-
docs/models/detokenizationrequest.md | 8 +-
...ionresponse.md => detokenizationresult.md} | 2 +-
docs/models/security.md | 2 +-
...nse.md => streamedchatcompletionresult.md} | 2 +-
docs/models/streamedcompletionresponse.md | 8 -
docs/models/streamedcompletionresult.md | 8 +
...ata.md => streamedcompletionresultdata.md} | 2 +-
...eamedtoolassistedchatcompletionresponse.md | 10 -
...treamedtoolassistedchatcompletionresult.md | 10 +
...edtoolassistedchatcompletionresultdata.md} | 2 +-
...tionrequestbody.md => tokenizationbody.md} | 2 +-
docs/models/tokenizationrequest.md | 8 +-
...ationresponse.md => tokenizationresult.md} | 2 +-
.../toolassistedchatcompletionrequest.md | 8 +-
.../toolassistedchatcompletionresponse.md | 8 +-
...tbody.md => toolassistedcompletionbody.md} | 6 +-
...> toolassistedcompletionbodytoolchoice.md} | 2 +-
...sistedcompletionbodytoolchoicefunction.md} | 2 +-
...olassistedcompletionbodytoolchoicetype.md} | 2 +-
docs/models/toolchoiceobject.md | 8 +-
docs/sdks/inference/README.md | 134 +-
docs/sdks/serverless/README.md | 74 +-
openapi.yaml | 1412 -----------------
pyproject.toml | 2 +-
src/friendli/_version.py | 2 +-
src/friendli/inference.py | 400 +++--
src/friendli/models/__init__.py | 192 ++-
src/friendli/models/assistantmessage.py | 16 +-
...onrequestbody.py => chatcompletionbody.py} | 13 +-
src/friendli/models/chatcompletionchoice.py | 9 +-
src/friendli/models/chatcompletionop.py | 44 +-
...ionresponse.py => chatcompletionresult.py} | 4 +-
src/friendli/models/completionbody.py | 20 +
...hprompt.py => completionbodywithprompt.py} | 4 +-
...htokens.py => completionbodywithtokens.py} | 4 +-
src/friendli/models/completionop.py | 38 +-
src/friendli/models/completionrequestbody.py | 22 -
...pletionresponse.py => completionresult.py} | 4 +-
...onrequestbody.py => detokenizationbody.py} | 4 +-
src/friendli/models/detokenizationop.py | 17 +-
...ionresponse.py => detokenizationresult.py} | 4 +-
src/friendli/models/filebuiltintool.py | 9 +-
src/friendli/models/functiontool.py | 8 +-
src/friendli/models/otherbuiltintool.py | 20 +-
src/friendli/models/security.py | 4 +-
.../models/streamedchatcompletionchoice.py | 9 +-
...nse.py => streamedchatcompletionresult.py} | 4 +-
...esponse.py => streamedcompletionresult.py} | 12 +-
.../models/streamedcompletiontokencomplete.py | 9 +-
.../models/streamedcompletiontokensampled.py | 8 +-
...reamedtoolassistedchatcompletionresult.py} | 12 +-
src/friendli/models/systemmessage.py | 9 +-
src/friendli/models/textresponseformat.py | 11 +-
...tionrequestbody.py => tokenizationbody.py} | 4 +-
src/friendli/models/tokenizationop.py | 17 +-
...ationresponse.py => tokenizationresult.py} | 4 +-
src/friendli/models/tool.py | 8 +-
.../models/toolassistedchatcompletionop.py | 43 +-
...tbody.py => toolassistedcompletionbody.py} | 37 +-
src/friendli/models/toolmessage.py | 9 +-
src/friendli/models/usermessage.py | 9 +-
src/friendli/sdk.py | 16 +-
src/friendli/sdkconfiguration.py | 29 +-
src/friendli/serverless.py | 208 ++-
src/friendli/utils/security.py | 4 +-
86 files changed, 1231 insertions(+), 2469 deletions(-)
rename docs/models/{chatcompletionrequestbody.md => chatcompletionbody.md} (99%)
delete mode 100644 docs/models/chatcompletionresponse1.md
create mode 100644 docs/models/chatcompletionresult.md
create mode 100644 docs/models/completionbody.md
rename docs/models/{completionrequestbodywithprompt.md => completionbodywithprompt.md} (99%)
rename docs/models/{completionrequestbodywithtokens.md => completionbodywithtokens.md} (99%)
delete mode 100644 docs/models/completionrequestbody.md
delete mode 100644 docs/models/completionresponse1.md
create mode 100644 docs/models/completionresult.md
rename docs/models/{detokenizationrequestbody.md => detokenizationbody.md} (99%)
rename docs/models/{detokenizationresponse.md => detokenizationresult.md} (93%)
rename docs/models/{streamedchatcompletionresponse.md => streamedchatcompletionresult.md} (93%)
delete mode 100644 docs/models/streamedcompletionresponse.md
create mode 100644 docs/models/streamedcompletionresult.md
rename docs/models/{streamedcompletionresponsedata.md => streamedcompletionresultdata.md} (89%)
delete mode 100644 docs/models/streamedtoolassistedchatcompletionresponse.md
create mode 100644 docs/models/streamedtoolassistedchatcompletionresult.md
rename docs/models/{streamedtoolassistedchatcompletionresponsedata.md => streamedtoolassistedchatcompletionresultdata.md} (96%)
rename docs/models/{tokenizationrequestbody.md => tokenizationbody.md} (99%)
rename docs/models/{tokenizationresponse.md => tokenizationresult.md} (93%)
rename docs/models/{toolassistedcompletionrequestbody.md => toolassistedcompletionbody.md} (99%)
rename docs/models/{toolassistedcompletionrequestbodytoolchoice.md => toolassistedcompletionbodytoolchoice.md} (93%)
rename docs/models/{toolassistedcompletionrequestbodytoolchoicefunction.md => toolassistedcompletionbodytoolchoicefunction.md} (96%)
rename docs/models/{toolassistedcompletionrequestbodytoolchoicetype.md => toolassistedcompletionbodytoolchoicetype.md} (76%)
delete mode 100644 openapi.yaml
rename src/friendli/models/{chatcompletionrequestbody.py => chatcompletionbody.py} (98%)
rename src/friendli/models/{chatcompletionresponse.py => chatcompletionresult.py} (88%)
create mode 100644 src/friendli/models/completionbody.py
rename src/friendli/models/{completionrequestbodywithprompt.py => completionbodywithprompt.py} (99%)
rename src/friendli/models/{completionrequestbodywithtokens.py => completionbodywithtokens.py} (99%)
delete mode 100644 src/friendli/models/completionrequestbody.py
rename src/friendli/models/{completionresponse.py => completionresult.py} (84%)
rename src/friendli/models/{detokenizationrequestbody.py => detokenizationbody.py} (88%)
rename src/friendli/models/{detokenizationresponse.py => detokenizationresult.py} (83%)
rename src/friendli/models/{streamedchatcompletionresponse.py => streamedchatcompletionresult.py} (90%)
rename src/friendli/models/{streamedcompletionresponse.py => streamedcompletionresult.py} (75%)
rename src/friendli/models/{streamedtoolassistedchatcompletionresponse.py => streamedtoolassistedchatcompletionresult.py} (65%)
rename src/friendli/models/{tokenizationrequestbody.py => tokenizationbody.py} (87%)
rename src/friendli/models/{tokenizationresponse.py => tokenizationresult.py} (84%)
rename src/friendli/models/{toolassistedcompletionrequestbody.py => toolassistedcompletionbody.py} (94%)
diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock
index d630928..1668437 100644
--- a/.speakeasy/gen.lock
+++ b/.speakeasy/gen.lock
@@ -1,15 +1,14 @@
lockVersion: 2.0.0
-id: 2ab1f3f0-3683-4414-9916-f8a387d2b0b7
+id: a1c5fd64-140c-4c5e-a0d3-b834f17979db
management:
- docChecksum: 3579e3e4e8b9edab91ef382d35f00937
+ docChecksum: 2c70ec12761d35ff903605a8858a9bb2
docVersion: v1
- speakeasyVersion: 1.434.0
+ speakeasyVersion: 1.434.7
generationVersion: 2.452.0
- releaseVersion: 0.1.4
- configChecksum: fa378ee66fbbc9b0ac1661e270f06bf3
+ releaseVersion: 0.6.0
+ configChecksum: 5b594f923a7a03f579a9965704666db8
repoURL: https://github.com/friendliai/friendli-python-internal.git
installationURL: https://github.com/friendliai/friendli-python-internal.git
- published: true
features:
python:
acceptHeaders: 3.0.0
@@ -19,6 +18,8 @@ features:
defaultEnabledRetries: 0.2.0
enumUnions: 0.1.0
envVarSecurityUsage: 0.3.2
+ examples: 3.0.0
+ flatRequests: 1.0.1
flattening: 3.1.0
globalSecurity: 3.0.2
globalSecurityCallbacks: 1.0.0
@@ -32,6 +33,7 @@ features:
sdkHooks: 1.0.0
serverEvents: 1.0.4
serverEventsSentinels: 0.1.0
+ serverIDs: 3.0.0
tests: 1.6.0
unions: 3.0.3
generatedFiles:
@@ -44,28 +46,28 @@ generatedFiles:
- docs/models/assistantmessagefunction.md
- docs/models/assistantmessagerole.md
- docs/models/assistantmessagetype.md
+ - docs/models/chatcompletionbody.md
- docs/models/chatcompletionchoice.md
- docs/models/chatcompletionchoicefunction.md
- docs/models/chatcompletionchoicemessage.md
- docs/models/chatcompletionchoicetoolcalls.md
- docs/models/chatcompletionchoicetype.md
- docs/models/chatcompletionrequest.md
- - docs/models/chatcompletionrequestbody.md
- docs/models/chatcompletionresponse.md
- - docs/models/chatcompletionresponse1.md
+ - docs/models/chatcompletionresult.md
+ - docs/models/completionbody.md
+ - docs/models/completionbodywithprompt.md
+ - docs/models/completionbodywithtokens.md
- docs/models/completionchoice.md
- docs/models/completionrequest.md
- - docs/models/completionrequestbody.md
- - docs/models/completionrequestbodywithprompt.md
- - docs/models/completionrequestbodywithtokens.md
- docs/models/completionresponse.md
- - docs/models/completionresponse1.md
+ - docs/models/completionresult.md
- docs/models/content.md
- docs/models/data.md
- docs/models/delta.md
+ - docs/models/detokenizationbody.md
- docs/models/detokenizationrequest.md
- - docs/models/detokenizationrequestbody.md
- - docs/models/detokenizationresponse.md
+ - docs/models/detokenizationresult.md
- docs/models/event.md
- docs/models/filebuiltintool.md
- docs/models/filebuiltintooltype.md
@@ -85,28 +87,28 @@ generatedFiles:
- docs/models/streamedchatcompletionchoicefunction.md
- docs/models/streamedchatcompletionchoicetoolcalls.md
- docs/models/streamedchatcompletionchoicetype.md
- - docs/models/streamedchatcompletionresponse.md
- - docs/models/streamedcompletionresponse.md
- - docs/models/streamedcompletionresponsedata.md
+ - docs/models/streamedchatcompletionresult.md
+ - docs/models/streamedcompletionresult.md
+ - docs/models/streamedcompletionresultdata.md
- docs/models/streamedcompletiontokencomplete.md
- docs/models/streamedcompletiontokencompleteevent.md
- docs/models/streamedcompletiontokensampled.md
- - docs/models/streamedtoolassistedchatcompletionresponse.md
- - docs/models/streamedtoolassistedchatcompletionresponsedata.md
+ - docs/models/streamedtoolassistedchatcompletionresult.md
+ - docs/models/streamedtoolassistedchatcompletionresultdata.md
- docs/models/streamoptions.md
- docs/models/systemmessage.md
- docs/models/textresponseformat.md
+ - docs/models/tokenizationbody.md
- docs/models/tokenizationrequest.md
- - docs/models/tokenizationrequestbody.md
- - docs/models/tokenizationresponse.md
+ - docs/models/tokenizationresult.md
- docs/models/tokensequence.md
- docs/models/tool.md
- docs/models/toolassistedchatcompletionrequest.md
- docs/models/toolassistedchatcompletionresponse.md
- - docs/models/toolassistedcompletionrequestbody.md
- - docs/models/toolassistedcompletionrequestbodytoolchoice.md
- - docs/models/toolassistedcompletionrequestbodytoolchoicefunction.md
- - docs/models/toolassistedcompletionrequestbodytoolchoicetype.md
+ - docs/models/toolassistedcompletionbody.md
+ - docs/models/toolassistedcompletionbodytoolchoice.md
+ - docs/models/toolassistedcompletionbodytoolchoicefunction.md
+ - docs/models/toolassistedcompletionbodytoolchoicetype.md
- docs/models/toolcalls.md
- docs/models/toolchoice.md
- docs/models/toolchoicefunction.md
@@ -141,19 +143,19 @@ generatedFiles:
- src/friendli/inference.py
- src/friendli/models/__init__.py
- src/friendli/models/assistantmessage.py
+ - src/friendli/models/chatcompletionbody.py
- src/friendli/models/chatcompletionchoice.py
- src/friendli/models/chatcompletionop.py
- - src/friendli/models/chatcompletionrequestbody.py
- - src/friendli/models/chatcompletionresponse.py
+ - src/friendli/models/chatcompletionresult.py
+ - src/friendli/models/completionbody.py
+ - src/friendli/models/completionbodywithprompt.py
+ - src/friendli/models/completionbodywithtokens.py
- src/friendli/models/completionchoice.py
- src/friendli/models/completionop.py
- - src/friendli/models/completionrequestbody.py
- - src/friendli/models/completionrequestbodywithprompt.py
- - src/friendli/models/completionrequestbodywithtokens.py
- - src/friendli/models/completionresponse.py
+ - src/friendli/models/completionresult.py
+ - src/friendli/models/detokenizationbody.py
- src/friendli/models/detokenizationop.py
- - src/friendli/models/detokenizationrequestbody.py
- - src/friendli/models/detokenizationresponse.py
+ - src/friendli/models/detokenizationresult.py
- src/friendli/models/filebuiltintool.py
- src/friendli/models/function.py
- src/friendli/models/functiontool.py
@@ -163,20 +165,20 @@ generatedFiles:
- src/friendli/models/sdkerror.py
- src/friendli/models/security.py
- src/friendli/models/streamedchatcompletionchoice.py
- - src/friendli/models/streamedchatcompletionresponse.py
- - src/friendli/models/streamedcompletionresponse.py
+ - src/friendli/models/streamedchatcompletionresult.py
+ - src/friendli/models/streamedcompletionresult.py
- src/friendli/models/streamedcompletiontokencomplete.py
- src/friendli/models/streamedcompletiontokensampled.py
- - src/friendli/models/streamedtoolassistedchatcompletionresponse.py
+ - src/friendli/models/streamedtoolassistedchatcompletionresult.py
- src/friendli/models/systemmessage.py
- src/friendli/models/textresponseformat.py
+ - src/friendli/models/tokenizationbody.py
- src/friendli/models/tokenizationop.py
- - src/friendli/models/tokenizationrequestbody.py
- - src/friendli/models/tokenizationresponse.py
+ - src/friendli/models/tokenizationresult.py
- src/friendli/models/tokensequence.py
- src/friendli/models/tool.py
- src/friendli/models/toolassistedchatcompletionop.py
- - src/friendli/models/toolassistedcompletionrequestbody.py
+ - src/friendli/models/toolassistedcompletionbody.py
- src/friendli/models/toolfortoolassistedchat.py
- src/friendli/models/toolmessage.py
- src/friendli/models/usage.py
diff --git a/.speakeasy/gen.yaml b/.speakeasy/gen.yaml
index f033237..20fb2ff 100644
--- a/.speakeasy/gen.yaml
+++ b/.speakeasy/gen.yaml
@@ -13,7 +13,7 @@ generation:
oAuth2ClientCredentialsEnabled: true
oAuth2PasswordEnabled: true
python:
- version: 0.1.4
+ version: 0.6.0
additionalDependencies:
dev: {}
main: {}
@@ -21,12 +21,12 @@ python:
- Speakeasy
clientServerStatusCodesAsErrors: true
description: Python Client SDK Generated by Speakeasy.
- enumFormat: enum
+ enumFormat: union
envVarPrefix: FRIENDLI
fixFlags:
responseRequiredSep2024: true
flattenGlobalSecurity: true
- flattenRequests: false
+ flattenRequests: true
flatteningOrder: parameters-first
imports:
option: openapi
@@ -37,7 +37,7 @@ python:
shared: ""
webhooks: ""
inputModelSuffix: input
- maxMethodParams: 4
+ maxMethodParams: 6
methodArguments: infer-optional-args
outputModelSuffix: output
packageName: friendli
diff --git a/.speakeasy/workflow.lock b/.speakeasy/workflow.lock
index 9b365d0..6b046dd 100644
--- a/.speakeasy/workflow.lock
+++ b/.speakeasy/workflow.lock
@@ -1,36 +1,33 @@
-speakeasyVersion: 1.434.0
+speakeasyVersion: 1.434.7
sources:
- Friendli-OpenAPI:
- sourceNamespace: friendli-openapi
- sourceRevisionDigest: sha256:e6f072ee17efc4b8618034865bb6339a12b3738326329a3a342afe1d9fedfa8e
- sourceBlobDigest: sha256:5df0dde8834747719e800ff32696ecce9371b361ec137cd188c1db8dc0c5b12b
+ Friendli-OpenAPI-Schema:
+ sourceNamespace: friendli-openapi-schema
+ sourceRevisionDigest: sha256:84cd2a7d92b5dbca01117ad887b99f6f91c245d8978d96fadb0c1a05ab821b31
+ sourceBlobDigest: sha256:d3be7cb16fdb7bf9d84980231b6b2e84979423922d27b681789f290093c4289b
tags:
- latest
targets:
friendli:
- source: Friendli-OpenAPI
- sourceNamespace: friendli-openapi
- sourceRevisionDigest: sha256:e6f072ee17efc4b8618034865bb6339a12b3738326329a3a342afe1d9fedfa8e
- sourceBlobDigest: sha256:5df0dde8834747719e800ff32696ecce9371b361ec137cd188c1db8dc0c5b12b
- codeSamplesNamespace: friendli-openapi-code-samples
- codeSamplesRevisionDigest: sha256:fe072119b3ddd638214c15da1390893008997a4bcd0f57812599b2c6005f3a48
+ source: Friendli-OpenAPI-Schema
+ sourceNamespace: friendli-openapi-schema
+ sourceRevisionDigest: sha256:84cd2a7d92b5dbca01117ad887b99f6f91c245d8978d96fadb0c1a05ab821b31
+ sourceBlobDigest: sha256:d3be7cb16fdb7bf9d84980231b6b2e84979423922d27b681789f290093c4289b
+ codeSamplesNamespace: friendli-openapi-schema-code-samples
+ codeSamplesRevisionDigest: sha256:2be646193bd0354d3e5e797150bbf02ef9d4008d1fc142ef90947b6a04dd6722
workflow:
workflowVersion: 1.0.0
speakeasyVersion: latest
sources:
- Friendli-OpenAPI:
+ Friendli-OpenAPI-Schema:
inputs:
- - location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi
+ - location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-schema
registry:
- location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi
+ location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-schema
targets:
friendli:
target: python
- source: Friendli-OpenAPI
- publish:
- pypi:
- token: $pypi_token
+ source: Friendli-OpenAPI-Schema
codeSamples:
registry:
- location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-code-samples
+ location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-schema-code-samples
blocking: false
diff --git a/.speakeasy/workflow.yaml b/.speakeasy/workflow.yaml
index 3973230..616ba65 100644
--- a/.speakeasy/workflow.yaml
+++ b/.speakeasy/workflow.yaml
@@ -1,19 +1,16 @@
workflowVersion: 1.0.0
speakeasyVersion: latest
sources:
- Friendli-OpenAPI:
+ Friendli-OpenAPI-Schema:
inputs:
- - location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi
+ - location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-schema
registry:
- location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi
+ location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-schema
targets:
friendli:
target: python
- source: Friendli-OpenAPI
- publish:
- pypi:
- token: $pypi_token
+ source: Friendli-OpenAPI-Schema
codeSamples:
registry:
- location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-code-samples
+ location: registry.speakeasyapi.dev/friendliai/friendliai/friendli-openapi-schema-code-samples
blocking: false
diff --git a/README.md b/README.md
index fbc799b..04b4ef5 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# friendli
-Developer-friendly & type-safe Python SDK specifically catered to leverage _friendli_ API.
+Developer-friendly & type-safe Python SDK specifically catered to leverage *friendli* API.
@@ -9,6 +9,11 @@ Developer-friendly & type-safe Python SDK specifically catered to leverage _frie
+
+
+> [!IMPORTANT]
+> This SDK is not yet ready for production use. To complete setup please follow the steps outlined in your [workspace](https://app.speakeasy.com/org/friendliai/friendliai). Delete this section before > publishing to a package manager.
+
## Summary
@@ -41,7 +46,7 @@ The SDK can be installed with either *pip* or *poetry* package managers.
*PIP* is the default package installer for Python, enabling easy installation and management of packages from PyPI via the command line.
```bash
-pip install friendli
+pip install git+https://github.com/friendliai/friendli-python-internal.git
```
### Poetry
@@ -49,7 +54,7 @@ pip install friendli
*Poetry* is a modern tool that simplifies dependency management and package publishing by using a single `pyproject.toml` file to handle project metadata and dependencies.
```bash
-poetry add friendli
+poetry add git+https://github.com/friendliai/friendli-python-internal.git
```
@@ -66,32 +71,29 @@ Generally, the SDK will work well with most IDEs out of the box. However, when u
## SDK Example Usage
-### Example
+### Chat completion
+
+Given a list of messages forming a conversation, the model generates a response.
```python
# Synchronous Example
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -105,28 +107,23 @@ The same SDK client can also be used to make asychronous requests by importing a
```python
# Asynchronous Example
import asyncio
-import friendli
from friendli import Friendli
import os
async def main():
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
- res = await s.inference.chat_completion_async(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
- })
+ res = await s.inference.chat_completion_async(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+ ], max_tokens=200)
if res is not None:
for event in res:
# handle event
@@ -167,28 +164,23 @@ terminate when the server no longer has any events to send and closes the
underlying connection.
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -208,29 +200,24 @@ Some of the endpoints in this SDK support retries. If you use the SDK without an
To change the default retry strategy for a single API call, simply provide a `RetryConfig` object to the call:
```python
-import friendli
from friendli import Friendli
from friendli.utils import BackoffStrategy, RetryConfig
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-},
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200,
RetryConfig("backoff", BackoffStrategy(1, 50, 1.1, 100), False))
if res is not None:
@@ -242,30 +229,25 @@ if res is not None:
If you'd like to override the default retry strategy for all operations that support retries, you can use the `retry_config` optional parameter when initializing the SDK:
```python
-import friendli
from friendli import Friendli
from friendli.utils import BackoffStrategy, RetryConfig
import os
s = Friendli(
retry_config=RetryConfig("backoff", BackoffStrategy(1, 50, 1.1, 100), False),
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -298,30 +280,25 @@ When custom error responses are specified for an operation, the SDK may also rai
### Example
```python
-import friendli
from friendli import Friendli, models
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
res = None
try:
- res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
- })
+ res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+ ], max_tokens=200)
if res is not None:
for event in res:
@@ -337,41 +314,36 @@ except models.SDKError as e:
## Server Selection
-### Select Server by Index
+### Select Server by Name
-You can override the default server globally by passing a server index to the `server_idx: int` optional parameter when initializing the SDK client instance. The selected server will then be used as the default on the operations that use it. This table lists the indexes associated with the available servers:
+You can override the default server globally by passing a server name to the `server: str` optional parameter when initializing the SDK client instance. The selected server will then be used as the default on the operations that use it. This table lists the names associated with the available servers:
-| # | Server | Variables |
-| - | ------ | --------- |
-| 0 | `https://inference.friendli.ai` | None |
-| 1 | `https://inference.friendli.ai/dedicated` | None |
+| Name | Server | Variables |
+| ----- | ------ | --------- |
+| `serverless` | `https://inference.friendli.ai` | None |
+| `dedicated` | `https://inference.friendli.ai/dedicated` | None |
#### Example
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
- server_idx=1,
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ server="dedicated",
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -385,29 +357,24 @@ if res is not None:
The default server can also be overridden globally by passing a URL to the `server_url: str` optional parameter when initializing the SDK client instance. For example:
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
server_url="https://inference.friendli.ai",
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -420,36 +387,30 @@ if res is not None:
The server URL can also be overridden on a per-operation basis, provided a server list was specified for the operation. For example:
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.serverless.tool_assisted_chat_completion(tool_assisted_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
- "tools": [
- {
- "type": friendli.OtherBuiltInToolType.MATH_CALCULATOR,
- },
- {
- "type": friendli.OtherBuiltInToolType.WEB_URL,
- },
- ],
-}, server_url="https://inference.friendli.ai")
+res = s.serverless.tool_assisted_chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200, tools=[
+ {
+ "type": "math:calculator",
+ },
+ {
+ "type": "web:url",
+ },
+], server_url="https://inference.friendli.ai")
if res is not None:
for event in res:
@@ -547,34 +508,29 @@ s = Friendli(async_client=CustomClient(httpx.AsyncClient()))
This SDK supports the following security scheme globally:
-| Name | Type | Scheme | Environment Variable |
-| ---------------------- | ---------------------- | ---------------------- | ---------------------- |
-| `bearer_auth` | http | HTTP Bearer | `FRIENDLI_BEARER_AUTH` |
+| Name | Type | Scheme | Environment Variable |
+| -------------------- | -------------------- | -------------------- | -------------------- |
+| `token` | http | HTTP Bearer | `FRIENDLI_TOKEN` |
-To authenticate with the API the `bearer_auth` parameter must be set when initializing the SDK client instance. For example:
+To authenticate with the API the `token` parameter must be set when initializing the SDK client instance. For example:
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -613,7 +569,7 @@ looking for the latest version.
## Contributions
-While we value open-source contributions to this SDK, this library is generated programmatically. Any manual changes added to internal files will be overwritten on the next generation.
-We look forward to hearing your feedback. Feel free to open a PR or an issue with a proof of concept and we'll do our best to include it in a future release.
+While we value open-source contributions to this SDK, this library is generated programmatically. Any manual changes added to internal files will be overwritten on the next generation.
+We look forward to hearing your feedback. Feel free to open a PR or an issue with a proof of concept and we'll do our best to include it in a future release.
### SDK Created by [Speakeasy](https://www.speakeasy.com/?utm_source=friendli&utm_campaign=python)
diff --git a/RELEASES.md b/RELEASES.md
index 293af2f..3035cb8 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,11 +1,9 @@
-## 2024-11-07 04:43:57
+## 2024-11-07 07:15:28
### Changes
Based on:
- OpenAPI Doc
- Speakeasy CLI 1.434.0 (2.452.0) https://github.com/speakeasy-api/speakeasy
### Generated
-- [python v0.1.3] .
-### Releases
-- [PyPI v0.1.3] https://pypi.org/project/friendli/0.1.3 - .
\ No newline at end of file
+- [python v0.5.0] .
\ No newline at end of file
diff --git a/USAGE.md b/USAGE.md
index 9e8df02..f68c656 100644
--- a/USAGE.md
+++ b/USAGE.md
@@ -1,28 +1,27 @@
+### Chat completion
+
+Given a list of messages forming a conversation, the model generates a response.
+
```python
# Synchronous Example
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -36,28 +35,23 @@ The same SDK client can also be used to make asychronous requests by importing a
```python
# Asynchronous Example
import asyncio
-import friendli
from friendli import Friendli
import os
async def main():
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
- res = await s.inference.chat_completion_async(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
- })
+ res = await s.inference.chat_completion_async(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+ ], max_tokens=200)
if res is not None:
for event in res:
# handle event
diff --git a/docs/models/chatcompletionrequestbody.md b/docs/models/chatcompletionbody.md
similarity index 99%
rename from docs/models/chatcompletionrequestbody.md
rename to docs/models/chatcompletionbody.md
index 023745c..a67c04e 100644
--- a/docs/models/chatcompletionrequestbody.md
+++ b/docs/models/chatcompletionbody.md
@@ -1,4 +1,4 @@
-# ChatCompletionRequestBody
+# ChatCompletionBody
## Fields
@@ -6,7 +6,7 @@
| Field | Type | Required | Description | Example |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct |
-| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello"
}
] |
+| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] |
| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | |
| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | |
| `logit_bias` | [OptionalNullable[models.LogitBias]](../models/logitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | |
diff --git a/docs/models/chatcompletionrequest.md b/docs/models/chatcompletionrequest.md
index 68667f1..34edc5c 100644
--- a/docs/models/chatcompletionrequest.md
+++ b/docs/models/chatcompletionrequest.md
@@ -3,7 +3,7 @@
## Fields
-| Field | Type | Required | Description |
-| ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `chat_completion_request_body` | [Optional[models.ChatCompletionRequestBody]](../models/chatcompletionrequestbody.md) | :heavy_minus_sign: | N/A |
\ No newline at end of file
+| Field | Type | Required | Description |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| `chat_completion_body` | [models.ChatCompletionBody](../models/chatcompletionbody.md) | :heavy_check_mark: | N/A |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
\ No newline at end of file
diff --git a/docs/models/chatcompletionresponse.md b/docs/models/chatcompletionresponse.md
index 93545d4..98bc918 100644
--- a/docs/models/chatcompletionresponse.md
+++ b/docs/models/chatcompletionresponse.md
@@ -1,10 +1,17 @@
# ChatCompletionResponse
-## Fields
+## Supported Types
+
+### `models.ChatCompletionResult`
+
+```python
+value: models.ChatCompletionResult = /* values here */
+```
+
+### `Union[Generator[models.StreamedChatCompletionResult, None, None], AsyncGenerator[models.StreamedChatCompletionResult, None]]`
+
+```python
+value: Union[Generator[models.StreamedChatCompletionResult, None, None], AsyncGenerator[models.StreamedChatCompletionResult, None]] = /* values here */
+```
-| Field | Type | Required | Description |
-| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- |
-| `choices` | List[[models.ChatCompletionChoice](../models/chatcompletionchoice.md)] | :heavy_check_mark: | N/A |
-| `usage` | [models.Usage](../models/usage.md) | :heavy_check_mark: | N/A |
-| `created` | *Optional[int]* | :heavy_minus_sign: | The Unix timestamp (in seconds) for when the generation completed. |
\ No newline at end of file
diff --git a/docs/models/chatcompletionresponse1.md b/docs/models/chatcompletionresponse1.md
deleted file mode 100644
index 56e29b0..0000000
--- a/docs/models/chatcompletionresponse1.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# ChatCompletionResponse1
-
-
-## Supported Types
-
-### `models.ChatCompletionResponse`
-
-```python
-value: models.ChatCompletionResponse = /* values here */
-```
-
-### `Union[Generator[models.StreamedChatCompletionResponse, None, None], AsyncGenerator[models.StreamedChatCompletionResponse, None]]`
-
-```python
-value: Union[Generator[models.StreamedChatCompletionResponse, None, None], AsyncGenerator[models.StreamedChatCompletionResponse, None]] = /* values here */
-```
-
diff --git a/docs/models/chatcompletionresult.md b/docs/models/chatcompletionresult.md
new file mode 100644
index 0000000..a3f0c99
--- /dev/null
+++ b/docs/models/chatcompletionresult.md
@@ -0,0 +1,10 @@
+# ChatCompletionResult
+
+
+## Fields
+
+| Field | Type | Required | Description |
+| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- |
+| `choices` | List[[models.ChatCompletionChoice](../models/chatcompletionchoice.md)] | :heavy_check_mark: | N/A |
+| `usage` | [models.Usage](../models/usage.md) | :heavy_check_mark: | N/A |
+| `created` | *Optional[int]* | :heavy_minus_sign: | The Unix timestamp (in seconds) for when the generation completed. |
\ No newline at end of file
diff --git a/docs/models/completionbody.md b/docs/models/completionbody.md
new file mode 100644
index 0000000..3367409
--- /dev/null
+++ b/docs/models/completionbody.md
@@ -0,0 +1,17 @@
+# CompletionBody
+
+
+## Supported Types
+
+### `models.CompletionBodyWithPrompt`
+
+```python
+value: models.CompletionBodyWithPrompt = /* values here */
+```
+
+### `models.CompletionBodyWithTokens`
+
+```python
+value: models.CompletionBodyWithTokens = /* values here */
+```
+
diff --git a/docs/models/completionrequestbodywithprompt.md b/docs/models/completionbodywithprompt.md
similarity index 99%
rename from docs/models/completionrequestbodywithprompt.md
rename to docs/models/completionbodywithprompt.md
index 701bf5f..1a356f8 100644
--- a/docs/models/completionrequestbodywithprompt.md
+++ b/docs/models/completionbodywithprompt.md
@@ -1,4 +1,4 @@
-# CompletionRequestBodyWithPrompt
+# CompletionBodyWithPrompt
## Fields
diff --git a/docs/models/completionrequestbodywithtokens.md b/docs/models/completionbodywithtokens.md
similarity index 99%
rename from docs/models/completionrequestbodywithtokens.md
rename to docs/models/completionbodywithtokens.md
index 4143baf..97cb343 100644
--- a/docs/models/completionrequestbodywithtokens.md
+++ b/docs/models/completionbodywithtokens.md
@@ -1,4 +1,4 @@
-# CompletionRequestBodyWithTokens
+# CompletionBodyWithTokens
## Fields
diff --git a/docs/models/completionrequest.md b/docs/models/completionrequest.md
index afa00e1..6683670 100644
--- a/docs/models/completionrequest.md
+++ b/docs/models/completionrequest.md
@@ -3,7 +3,7 @@
## Fields
-| Field | Type | Required | Description |
-| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `completion_request_body` | [Optional[models.CompletionRequestBody]](../models/completionrequestbody.md) | :heavy_minus_sign: | N/A |
\ No newline at end of file
+| Field | Type | Required | Description |
+| ---------------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------- |
+| `completion_body` | [models.CompletionBody](../models/completionbody.md) | :heavy_check_mark: | N/A |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
\ No newline at end of file
diff --git a/docs/models/completionrequestbody.md b/docs/models/completionrequestbody.md
deleted file mode 100644
index 5b21796..0000000
--- a/docs/models/completionrequestbody.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# CompletionRequestBody
-
-
-## Supported Types
-
-### `models.CompletionRequestBodyWithPrompt`
-
-```python
-value: models.CompletionRequestBodyWithPrompt = /* values here */
-```
-
-### `models.CompletionRequestBodyWithTokens`
-
-```python
-value: models.CompletionRequestBodyWithTokens = /* values here */
-```
-
diff --git a/docs/models/completionresponse.md b/docs/models/completionresponse.md
index 0b00ef2..1b6f2b2 100644
--- a/docs/models/completionresponse.md
+++ b/docs/models/completionresponse.md
@@ -1,9 +1,17 @@
# CompletionResponse
-## Fields
+## Supported Types
+
+### `models.CompletionResult`
+
+```python
+value: models.CompletionResult = /* values here */
+```
+
+### `Union[Generator[models.StreamedCompletionResult, None, None], AsyncGenerator[models.StreamedCompletionResult, None]]`
+
+```python
+value: Union[Generator[models.StreamedCompletionResult, None, None], AsyncGenerator[models.StreamedCompletionResult, None]] = /* values here */
+```
-| Field | Type | Required | Description |
-| -------------------------------------------------------------- | -------------------------------------------------------------- | -------------------------------------------------------------- | -------------------------------------------------------------- |
-| `choices` | List[[models.CompletionChoice](../models/completionchoice.md)] | :heavy_check_mark: | N/A |
-| `usage` | [models.Usage](../models/usage.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/models/completionresponse1.md b/docs/models/completionresponse1.md
deleted file mode 100644
index 45f7928..0000000
--- a/docs/models/completionresponse1.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# CompletionResponse1
-
-
-## Supported Types
-
-### `models.CompletionResponse`
-
-```python
-value: models.CompletionResponse = /* values here */
-```
-
-### `Union[Generator[models.StreamedCompletionResponse, None, None], AsyncGenerator[models.StreamedCompletionResponse, None]]`
-
-```python
-value: Union[Generator[models.StreamedCompletionResponse, None, None], AsyncGenerator[models.StreamedCompletionResponse, None]] = /* values here */
-```
-
diff --git a/docs/models/completionresult.md b/docs/models/completionresult.md
new file mode 100644
index 0000000..9a23b0d
--- /dev/null
+++ b/docs/models/completionresult.md
@@ -0,0 +1,9 @@
+# CompletionResult
+
+
+## Fields
+
+| Field | Type | Required | Description |
+| -------------------------------------------------------------- | -------------------------------------------------------------- | -------------------------------------------------------------- | -------------------------------------------------------------- |
+| `choices` | List[[models.CompletionChoice](../models/completionchoice.md)] | :heavy_check_mark: | N/A |
+| `usage` | [models.Usage](../models/usage.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/models/detokenizationrequestbody.md b/docs/models/detokenizationbody.md
similarity index 99%
rename from docs/models/detokenizationrequestbody.md
rename to docs/models/detokenizationbody.md
index 5ecc69a..0f2e202 100644
--- a/docs/models/detokenizationrequestbody.md
+++ b/docs/models/detokenizationbody.md
@@ -1,4 +1,4 @@
-# DetokenizationRequestBody
+# DetokenizationBody
## Fields
diff --git a/docs/models/detokenizationrequest.md b/docs/models/detokenizationrequest.md
index 90d6e5a..f691cc2 100644
--- a/docs/models/detokenizationrequest.md
+++ b/docs/models/detokenizationrequest.md
@@ -3,7 +3,7 @@
## Fields
-| Field | Type | Required | Description |
-| ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `detokenization_request_body` | [Optional[models.DetokenizationRequestBody]](../models/detokenizationrequestbody.md) | :heavy_minus_sign: | N/A |
\ No newline at end of file
+| Field | Type | Required | Description |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| `detokenization_body` | [models.DetokenizationBody](../models/detokenizationbody.md) | :heavy_check_mark: | N/A |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
\ No newline at end of file
diff --git a/docs/models/detokenizationresponse.md b/docs/models/detokenizationresult.md
similarity index 93%
rename from docs/models/detokenizationresponse.md
rename to docs/models/detokenizationresult.md
index 241d9c7..f432cdb 100644
--- a/docs/models/detokenizationresponse.md
+++ b/docs/models/detokenizationresult.md
@@ -1,4 +1,4 @@
-# DetokenizationResponse
+# DetokenizationResult
Successfully detokenized the tokens.
diff --git a/docs/models/security.md b/docs/models/security.md
index 8ca6325..4750d59 100644
--- a/docs/models/security.md
+++ b/docs/models/security.md
@@ -5,4 +5,4 @@
| Field | Type | Required | Description |
| ------------------ | ------------------ | ------------------ | ------------------ |
-| `bearer_auth` | *Optional[str]* | :heavy_minus_sign: | N/A |
\ No newline at end of file
+| `token` | *Optional[str]* | :heavy_minus_sign: | N/A |
\ No newline at end of file
diff --git a/docs/models/streamedchatcompletionresponse.md b/docs/models/streamedchatcompletionresult.md
similarity index 93%
rename from docs/models/streamedchatcompletionresponse.md
rename to docs/models/streamedchatcompletionresult.md
index 445dc64..945b66d 100644
--- a/docs/models/streamedchatcompletionresponse.md
+++ b/docs/models/streamedchatcompletionresult.md
@@ -1,4 +1,4 @@
-# StreamedChatCompletionResponse
+# StreamedChatCompletionResult
A server-sent event containing chat completion content.
diff --git a/docs/models/streamedcompletionresponse.md b/docs/models/streamedcompletionresponse.md
deleted file mode 100644
index da44019..0000000
--- a/docs/models/streamedcompletionresponse.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# StreamedCompletionResponse
-
-
-## Fields
-
-| Field | Type | Required | Description |
-| ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ |
-| `data` | [models.StreamedCompletionResponseData](../models/streamedcompletionresponsedata.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/models/streamedcompletionresult.md b/docs/models/streamedcompletionresult.md
new file mode 100644
index 0000000..1145055
--- /dev/null
+++ b/docs/models/streamedcompletionresult.md
@@ -0,0 +1,8 @@
+# StreamedCompletionResult
+
+
+## Fields
+
+| Field | Type | Required | Description |
+| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
+| `data` | [models.StreamedCompletionResultData](../models/streamedcompletionresultdata.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/models/streamedcompletionresponsedata.md b/docs/models/streamedcompletionresultdata.md
similarity index 89%
rename from docs/models/streamedcompletionresponsedata.md
rename to docs/models/streamedcompletionresultdata.md
index 2d3826b..c7bb2b4 100644
--- a/docs/models/streamedcompletionresponsedata.md
+++ b/docs/models/streamedcompletionresultdata.md
@@ -1,4 +1,4 @@
-# StreamedCompletionResponseData
+# StreamedCompletionResultData
## Supported Types
diff --git a/docs/models/streamedtoolassistedchatcompletionresponse.md b/docs/models/streamedtoolassistedchatcompletionresponse.md
deleted file mode 100644
index 32e306d..0000000
--- a/docs/models/streamedtoolassistedchatcompletionresponse.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# StreamedToolAssistedChatCompletionResponse
-
-A server-sent event containing chat completion content.
-
-
-## Fields
-
-| Field | Type | Required | Description |
-| -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
-| `data` | [models.StreamedToolAssistedChatCompletionResponseData](../models/streamedtoolassistedchatcompletionresponsedata.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/models/streamedtoolassistedchatcompletionresult.md b/docs/models/streamedtoolassistedchatcompletionresult.md
new file mode 100644
index 0000000..d11445c
--- /dev/null
+++ b/docs/models/streamedtoolassistedchatcompletionresult.md
@@ -0,0 +1,10 @@
+# StreamedToolAssistedChatCompletionResult
+
+A server-sent event containing chat completion content.
+
+
+## Fields
+
+| Field | Type | Required | Description |
+| ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
+| `data` | [models.StreamedToolAssistedChatCompletionResultData](../models/streamedtoolassistedchatcompletionresultdata.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/models/streamedtoolassistedchatcompletionresponsedata.md b/docs/models/streamedtoolassistedchatcompletionresultdata.md
similarity index 96%
rename from docs/models/streamedtoolassistedchatcompletionresponsedata.md
rename to docs/models/streamedtoolassistedchatcompletionresultdata.md
index f399110..0657bed 100644
--- a/docs/models/streamedtoolassistedchatcompletionresponsedata.md
+++ b/docs/models/streamedtoolassistedchatcompletionresultdata.md
@@ -1,4 +1,4 @@
-# StreamedToolAssistedChatCompletionResponseData
+# StreamedToolAssistedChatCompletionResultData
## Fields
diff --git a/docs/models/tokenizationrequestbody.md b/docs/models/tokenizationbody.md
similarity index 99%
rename from docs/models/tokenizationrequestbody.md
rename to docs/models/tokenizationbody.md
index 56c3aa9..ed1f3dd 100644
--- a/docs/models/tokenizationrequestbody.md
+++ b/docs/models/tokenizationbody.md
@@ -1,4 +1,4 @@
-# TokenizationRequestBody
+# TokenizationBody
## Fields
diff --git a/docs/models/tokenizationrequest.md b/docs/models/tokenizationrequest.md
index 7ae04ff..3708807 100644
--- a/docs/models/tokenizationrequest.md
+++ b/docs/models/tokenizationrequest.md
@@ -3,7 +3,7 @@
## Fields
-| Field | Type | Required | Description |
-| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `tokenization_request_body` | [Optional[models.TokenizationRequestBody]](../models/tokenizationrequestbody.md) | :heavy_minus_sign: | N/A |
\ No newline at end of file
+| Field | Type | Required | Description |
+| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- |
+| `tokenization_body` | [models.TokenizationBody](../models/tokenizationbody.md) | :heavy_check_mark: | N/A |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
\ No newline at end of file
diff --git a/docs/models/tokenizationresponse.md b/docs/models/tokenizationresult.md
similarity index 93%
rename from docs/models/tokenizationresponse.md
rename to docs/models/tokenizationresult.md
index f767575..0d8c708 100644
--- a/docs/models/tokenizationresponse.md
+++ b/docs/models/tokenizationresult.md
@@ -1,4 +1,4 @@
-# TokenizationResponse
+# TokenizationResult
Successfully tokenized the text.
diff --git a/docs/models/toolassistedchatcompletionrequest.md b/docs/models/toolassistedchatcompletionrequest.md
index e10b174..b1434e7 100644
--- a/docs/models/toolassistedchatcompletionrequest.md
+++ b/docs/models/toolassistedchatcompletionrequest.md
@@ -3,7 +3,7 @@
## Fields
-| Field | Type | Required | Description |
-| ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `tool_assisted_completion_request_body` | [Optional[models.ToolAssistedCompletionRequestBody]](../models/toolassistedcompletionrequestbody.md) | :heavy_minus_sign: | N/A |
\ No newline at end of file
+| Field | Type | Required | Description |
+| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
+| `tool_assisted_completion_body` | [models.ToolAssistedCompletionBody](../models/toolassistedcompletionbody.md) | :heavy_check_mark: | N/A |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
\ No newline at end of file
diff --git a/docs/models/toolassistedchatcompletionresponse.md b/docs/models/toolassistedchatcompletionresponse.md
index a1bbe76..05f74cd 100644
--- a/docs/models/toolassistedchatcompletionresponse.md
+++ b/docs/models/toolassistedchatcompletionresponse.md
@@ -3,15 +3,15 @@
## Supported Types
-### `models.ChatCompletionResponse`
+### `models.ChatCompletionResult`
```python
-value: models.ChatCompletionResponse = /* values here */
+value: models.ChatCompletionResult = /* values here */
```
-### `Union[Generator[models.StreamedToolAssistedChatCompletionResponse, None, None], AsyncGenerator[models.StreamedToolAssistedChatCompletionResponse, None]]`
+### `Union[Generator[models.StreamedToolAssistedChatCompletionResult, None, None], AsyncGenerator[models.StreamedToolAssistedChatCompletionResult, None]]`
```python
-value: Union[Generator[models.StreamedToolAssistedChatCompletionResponse, None, None], AsyncGenerator[models.StreamedToolAssistedChatCompletionResponse, None]] = /* values here */
+value: Union[Generator[models.StreamedToolAssistedChatCompletionResult, None, None], AsyncGenerator[models.StreamedToolAssistedChatCompletionResult, None]] = /* values here */
```
diff --git a/docs/models/toolassistedcompletionrequestbody.md b/docs/models/toolassistedcompletionbody.md
similarity index 99%
rename from docs/models/toolassistedcompletionrequestbody.md
rename to docs/models/toolassistedcompletionbody.md
index 17a409d..7a3b79e 100644
--- a/docs/models/toolassistedcompletionrequestbody.md
+++ b/docs/models/toolassistedcompletionbody.md
@@ -1,4 +1,4 @@
-# ToolAssistedCompletionRequestBody
+# ToolAssistedCompletionBody
## Fields
@@ -6,7 +6,7 @@
| Field | Type | Required | Description | Example |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct |
-| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello"
}
] |
+| `messages` | List[[models.Message](../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] |
| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | |
| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | |
| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 |
@@ -22,7 +22,7 @@
| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.
**Caution: `stream: false` is unsupported now.**
| |
| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | |
| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | |
-| `tool_choice` | [OptionalNullable[models.ToolAssistedCompletionRequestBodyToolChoice]](../models/toolassistedcompletionrequestbodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| |
+| `tool_choice` | [OptionalNullable[models.ToolAssistedCompletionBodyToolChoice]](../models/toolassistedcompletionbodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| |
| `tools` | List[[models.ToolForToolAssistedChat](../models/toolfortoolassistedchat.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://docs.friendli.ai/guides/serverless_endpoints/tools/built_in_tools).
**When `tools` are specified, `min_tokens` field is unsupported.**
| |
| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | |
| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | |
\ No newline at end of file
diff --git a/docs/models/toolassistedcompletionrequestbodytoolchoice.md b/docs/models/toolassistedcompletionbodytoolchoice.md
similarity index 93%
rename from docs/models/toolassistedcompletionrequestbodytoolchoice.md
rename to docs/models/toolassistedcompletionbodytoolchoice.md
index d73408d..76312fc 100644
--- a/docs/models/toolassistedcompletionrequestbodytoolchoice.md
+++ b/docs/models/toolassistedcompletionbodytoolchoice.md
@@ -1,4 +1,4 @@
-# ToolAssistedCompletionRequestBodyToolChoice
+# ToolAssistedCompletionBodyToolChoice
Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
diff --git a/docs/models/toolassistedcompletionrequestbodytoolchoicefunction.md b/docs/models/toolassistedcompletionbodytoolchoicefunction.md
similarity index 96%
rename from docs/models/toolassistedcompletionrequestbodytoolchoicefunction.md
rename to docs/models/toolassistedcompletionbodytoolchoicefunction.md
index 1ae5e1e..68303df 100644
--- a/docs/models/toolassistedcompletionrequestbodytoolchoicefunction.md
+++ b/docs/models/toolassistedcompletionbodytoolchoicefunction.md
@@ -1,4 +1,4 @@
-# ToolAssistedCompletionRequestBodyToolChoiceFunction
+# ToolAssistedCompletionBodyToolChoiceFunction
## Fields
diff --git a/docs/models/toolassistedcompletionrequestbodytoolchoicetype.md b/docs/models/toolassistedcompletionbodytoolchoicetype.md
similarity index 76%
rename from docs/models/toolassistedcompletionrequestbodytoolchoicetype.md
rename to docs/models/toolassistedcompletionbodytoolchoicetype.md
index 2a41b39..6963e5f 100644
--- a/docs/models/toolassistedcompletionrequestbodytoolchoicetype.md
+++ b/docs/models/toolassistedcompletionbodytoolchoicetype.md
@@ -1,4 +1,4 @@
-# ToolAssistedCompletionRequestBodyToolChoiceType
+# ToolAssistedCompletionBodyToolChoiceType
The type of the tool. Currently, only `function` is supported.
diff --git a/docs/models/toolchoiceobject.md b/docs/models/toolchoiceobject.md
index 0eb5d21..3fbc38b 100644
--- a/docs/models/toolchoiceobject.md
+++ b/docs/models/toolchoiceobject.md
@@ -3,7 +3,7 @@
## Fields
-| Field | Type | Required | Description |
-| ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ |
-| `type` | [models.ToolAssistedCompletionRequestBodyToolChoiceType](../models/toolassistedcompletionrequestbodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. |
-| `function` | [models.ToolAssistedCompletionRequestBodyToolChoiceFunction](../models/toolassistedcompletionrequestbodytoolchoicefunction.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
+| Field | Type | Required | Description |
+| ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
+| `type` | [models.ToolAssistedCompletionBodyToolChoiceType](../models/toolassistedcompletionbodytoolchoicetype.md) | :heavy_check_mark: | The type of the tool. Currently, only `function` is supported. |
+| `function` | [models.ToolAssistedCompletionBodyToolChoiceFunction](../models/toolassistedcompletionbodytoolchoicefunction.md) | :heavy_check_mark: | N/A |
\ No newline at end of file
diff --git a/docs/sdks/inference/README.md b/docs/sdks/inference/README.md
index 769b209..63d08cd 100644
--- a/docs/sdks/inference/README.md
+++ b/docs/sdks/inference/README.md
@@ -17,28 +17,23 @@ Given a list of messages forming a conversation, the model generates a response.
### Example Usage
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.chat_completion(chat_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
-})
+res = s.inference.chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200)
if res is not None:
for event in res:
@@ -49,15 +44,38 @@ if res is not None:
### Parameters
-| Parameter | Type | Required | Description |
-| --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `chat_completion_request_body` | [Optional[models.ChatCompletionRequestBody]](../../models/chatcompletionrequestbody.md) | :heavy_minus_sign: | N/A |
-| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. |
+| Parameter | Type | Required | Description | Example |
+| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct |
+| `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | |
+| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | |
+| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | |
+| `logit_bias` | [OptionalNullable[models.LogitBias]](../../models/logitbias.md) | :heavy_minus_sign: | Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model. | |
+| `logprobs` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to return log probabilities of the output tokens or not. | |
+| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 |
+| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.
**This field is unsupported when `tools` are specified.**
| |
+| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | |
+| `parallel_tool_calls` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to enable parallel function calling. | |
+| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | |
+| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | |
+| `response_format` | [OptionalNullable[models.TextResponseFormat]](../../models/textresponseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.
Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.
***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| |
+| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | |
+| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | |
+| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. | |
+| `stream_options` | [OptionalNullable[models.StreamOptions]](../../models/streamoptions.md) | :heavy_minus_sign: | Options related to stream.
It can only be used when `stream: true`.
| |
+| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | |
+| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | |
+| `tool_choice` | [OptionalNullable[models.ToolChoice]](../../models/toolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| |
+| `tools` | List[[models.Tool](../../models/tool.md)] | :heavy_minus_sign: | A list of tools the model may call.
Currently, only functions are supported as a tool.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
**When `tools` are specified, `min_tokens` field is unsupported.**
| |
+| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | |
+| `top_logprobs` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used. | |
+| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | |
+| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | |
### Response
-**[models.ChatCompletionResponse1](../../models/chatcompletionresponse1.md)**
+**[models.ChatCompletionResponse](../../models/chatcompletionresponse.md)**
### Errors
@@ -76,10 +94,10 @@ from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.completion(completion_request_body={
+res = s.inference.completion(completion_body={
"prompt": "Say this is a test!",
"model": "meta-llama-3.1-8b-instruct",
"max_tokens": 200,
@@ -95,15 +113,15 @@ if res is not None:
### Parameters
-| Parameter | Type | Required | Description |
-| ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `completion_request_body` | [Optional[models.CompletionRequestBody]](../../models/completionrequestbody.md) | :heavy_minus_sign: | N/A |
-| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. |
+| Parameter | Type | Required | Description |
+| ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- |
+| `completion_body` | [models.CompletionBody](../../models/completionbody.md) | :heavy_check_mark: | N/A |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
+| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. |
### Response
-**[models.CompletionResponse1](../../models/completionresponse1.md)**
+**[models.CompletionResponse](../../models/completionresponse.md)**
### Errors
@@ -122,13 +140,10 @@ from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.tokenization(tokenization_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "prompt": "What is generative AI?",
-})
+res = s.inference.tokenization(model="meta-llama-3.1-8b-instruct", prompt="What is generative AI?")
if res is not None:
# handle response
@@ -138,15 +153,16 @@ if res is not None:
### Parameters
-| Parameter | Type | Required | Description |
-| ----------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `tokenization_request_body` | [Optional[models.TokenizationRequestBody]](../../models/tokenizationrequestbody.md) | :heavy_minus_sign: | N/A |
-| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. |
+| Parameter | Type | Required | Description | Example |
+| ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct |
+| `prompt` | *str* | :heavy_check_mark: | Input text prompt to tokenize. | What is generative AI? |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | |
+| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | |
### Response
-**[models.TokenizationResponse](../../models/tokenizationresponse.md)**
+**[models.TokenizationResult](../../models/tokenizationresult.md)**
### Errors
@@ -165,21 +181,18 @@ from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.inference.detokenization(detokenization_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "tokens": [
- 128000,
- 3923,
- 374,
- 1803,
- 1413,
- 15592,
- 30,
- ],
-})
+res = s.inference.detokenization(model="meta-llama-3.1-8b-instruct", tokens=[
+ 128000,
+ 3923,
+ 374,
+ 1803,
+ 1413,
+ 15592,
+ 30,
+])
if res is not None:
# handle response
@@ -189,15 +202,16 @@ if res is not None:
### Parameters
-| Parameter | Type | Required | Description |
-| --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `detokenization_request_body` | [Optional[models.DetokenizationRequestBody]](../../models/detokenizationrequestbody.md) | :heavy_minus_sign: | N/A |
-| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. |
+| Parameter | Type | Required | Description | Example |
+| ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | |
+| `model` | *Optional[str]* | :heavy_minus_sign: | Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct |
+| `tokens` | List[*int*] | :heavy_minus_sign: | A token sequence to detokenize. | [
128000,
3923,
374,
1803,
1413,
15592,
30
] |
+| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | |
### Response
-**[models.DetokenizationResponse](../../models/detokenizationresponse.md)**
+**[models.DetokenizationResult](../../models/detokenizationresult.md)**
### Errors
diff --git a/docs/sdks/serverless/README.md b/docs/sdks/serverless/README.md
index 7aa53e2..c1e4bbd 100644
--- a/docs/sdks/serverless/README.md
+++ b/docs/sdks/serverless/README.md
@@ -14,36 +14,30 @@ Given a list of messages forming a conversation, the model generates a response.
### Example Usage
```python
-import friendli
from friendli import Friendli
import os
s = Friendli(
- bearer_auth=os.getenv("FRIENDLI_BEARER_AUTH", ""),
+ token=os.getenv("FRIENDLI_TOKEN", ""),
)
-res = s.serverless.tool_assisted_chat_completion(tool_assisted_completion_request_body={
- "model": "meta-llama-3.1-8b-instruct",
- "messages": [
- {
- "role": friendli.Role.SYSTEM,
- "content": "You are a helpful assistant.",
- },
- {
- "role": friendli.UserMessageRole.USER,
- "content": "Hello!",
- },
- ],
- "max_tokens": 200,
- "tools": [
- {
- "type": friendli.OtherBuiltInToolType.MATH_CALCULATOR,
- },
- {
- "type": friendli.OtherBuiltInToolType.WEB_URL,
- },
- ],
-})
+res = s.serverless.tool_assisted_chat_completion(model="meta-llama-3.1-8b-instruct", messages=[
+ {
+ "role": "system",
+ "content": "You are a helpful assistant.",
+ },
+ {
+ "role": "user",
+ "content": "Hello!",
+ },
+], max_tokens=200, tools=[
+ {
+ "type": "math:calculator",
+ },
+ {
+ "type": "web:url",
+ },
+])
if res is not None:
for event in res:
@@ -54,12 +48,32 @@ if res is not None:
### Parameters
-| Parameter | Type | Required | Description |
-| ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- |
-| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). |
-| `tool_assisted_completion_request_body` | [Optional[models.ToolAssistedCompletionRequestBody]](../../models/toolassistedcompletionrequestbody.md) | :heavy_minus_sign: | N/A |
-| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. |
-| `server_url` | *Optional[str]* | :heavy_minus_sign: | An optional server URL to use. |
+| Parameter | Type | Required | Description | Example |
+| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model` | *str* | :heavy_check_mark: | Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models). | meta-llama-3.1-8b-instruct |
+| `messages` | List[[models.Message](../../models/message.md)] | :heavy_check_mark: | A list of messages comprising the conversation so far. | [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
] |
+| `x_friendli_team` | *Optional[str]* | :heavy_minus_sign: | ID of team to run requests as (optional parameter). | |
+| `eos_token` | List[*int*] | :heavy_minus_sign: | A list of endpoint sentence tokens. | |
+| `frequency_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim. | |
+| `max_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument. | 200 |
+| `min_tokens` | *OptionalNullable[int]* | :heavy_minus_sign: | The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.
**This field is unsupported when `tools` are specified.**
| |
+| `n` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument. | |
+| `parallel_tool_calls` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to enable parallel function calling. | |
+| `presence_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text. | |
+| `repetition_penalty` | *OptionalNullable[float]* | :heavy_minus_sign: | Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument. | |
+| `response_format` | [OptionalNullable[models.TextResponseFormat]](../../models/textresponseformat.md) | :heavy_minus_sign: | The enforced format of the model's output.
Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
You can check this by verifying that the `finish_reason` of the output message is `length`.
***Important***
You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
Otherwise, the model may result in an unending stream of whitespace or other characters.
| |
+| `resume_generation` | *OptionalNullable[bool]* | :heavy_minus_sign: | Enable to continue text generation even after an error occurs during a tool call.
Note that enabling this option may use more tokens, as the system generates additional content to handle errors gracefully.
However, if the system fails more than 8 times, the generation will stop regardless.
***Tip***
This is useful in scenarios where you want to maintain text generation flow despite errors, such as when generating long-form content.
The user will not be interrupted by tool call issues, ensuring a smoother experience.
| |
+| `seed` | List[*int*] | :heavy_minus_sign: | Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations. | |
+| `stop` | List[*str*] | :heavy_minus_sign: | When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list. | |
+| `stream` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.
**Caution: `stream: false` is unsupported now.**
| |
+| `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument. | |
+| `timeout_microseconds` | *OptionalNullable[int]* | :heavy_minus_sign: | Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout. | |
+| `tool_choice` | [OptionalNullable[models.ToolAssistedCompletionBodyToolChoice]](../../models/toolassistedcompletionbodytoolchoice.md) | :heavy_minus_sign: | Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
| |
+| `tools` | List[[models.ToolForToolAssistedChat](../../models/toolfortoolassistedchat.md)] | :heavy_minus_sign: | A list of tools the model may call.
A maximum of 128 functions is supported.
Use this to provide a list of functions the model may generate JSON inputs for.
For more detailed information about each tool, please refer [here](https://docs.friendli.ai/guides/serverless_endpoints/tools/built_in_tools).
**When `tools` are specified, `min_tokens` field is unsupported.**
| |
+| `top_k` | *OptionalNullable[int]* | :heavy_minus_sign: | The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument. | |
+| `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument. | |
+| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | |
+| `server_url` | *Optional[str]* | :heavy_minus_sign: | An optional server URL to use. | http://localhost:8080 |
### Response
diff --git a/openapi.yaml b/openapi.yaml
deleted file mode 100644
index e5e0074..0000000
--- a/openapi.yaml
+++ /dev/null
@@ -1,1412 +0,0 @@
-openapi: 3.1.0
-
-servers:
- - url: https://inference.friendli.ai
- description: Friendli Serverless Endpoints.
- - url: https://inference.friendli.ai/dedicated
- description: Friendli Dedicated Endpoints.
-
-tags:
- - name: Inference
- - name: Serverless
-
-info:
- title: Friendli Endpoints API Reference
- version: v1
- termsOfService: https://friendli.ai/terms-of-service
- description: This is an OpenAPI reference of Friendli Endpoints API.
- contact:
- name: FriendliAI Support Team
- email: support@friendli.ai
-
-paths:
- /v1/chat/completions:
- post:
- tags: [Inference]
- summary: Chat completion
- description: Given a list of messages forming a conversation, the model generates a response.
- operationId: ChatCompletion
- parameters:
- - $ref: '#/components/parameters/XFriendliTeam'
- requestBody:
- $ref: '#/components/requestBodies/ChatCompletion'
- responses:
- '200':
- $ref: '#/components/responses/ChatCompletionSuccess'
-
- /tools/v1/chat/completions:
- post:
- servers:
- - url: https://inference.friendli.ai
- description: Friendli Serverless Endpoints.
- tags: [Serverless]
- summary: Tool assisted chat completion
- description: Given a list of messages forming a conversation, the model generates a response. Additionally, the model can utilize built-in tools for tool calls, enhancing its capability to provide more comprehensive and actionable responses.
- operationId: ToolAssistedChatCompletion
- parameters:
- - $ref: '#/components/parameters/XFriendliTeam'
- requestBody:
- $ref: '#/components/requestBodies/ToolAssistedChatCompletion'
- responses:
- '200':
- $ref: '#/components/responses/ToolAssistedChatCompletionSuccess'
-
- /v1/completions:
- post:
- tags: [Inference]
- summary: Completion
- description: Generate text based on the given text prompt.
- operationId: Completion
- parameters:
- - $ref: '#/components/parameters/XFriendliTeam'
- requestBody:
- $ref: '#/components/requestBodies/Completion'
- responses:
- '200':
- $ref: '#/components/responses/CompletionSuccess'
-
- /v1/tokenize:
- post:
- tags: [Inference]
- summary: Tokenization
- description: By giving a text input, generate a tokenized output of token IDs.
- operationId: Tokenization
- parameters:
- - $ref: '#/components/parameters/XFriendliTeam'
- requestBody:
- $ref: '#/components/requestBodies/Tokenization'
- responses:
- '200':
- $ref: '#/components/responses/TokenizationSuccess'
-
- /v1/detokenize:
- post:
- tags: [Inference]
- summary: Detokenization
- description: By giving a list of tokens, generate a detokenized output text string.
- operationId: Detokenization
- parameters:
- - $ref: '#/components/parameters/XFriendliTeam'
- requestBody:
- $ref: '#/components/requestBodies/Detokenization'
- responses:
- '200':
- $ref: '#/components/responses/DetokenizationSuccess'
-
-components:
- schemas:
- ChatCompletionRequestBody:
- type: object
- properties:
- model:
- type: string
- description: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
- examples: ['meta-llama-3.1-8b-instruct']
- messages:
- type: array
- items:
- $ref: '#/components/schemas/Message'
- description: A list of messages comprising the conversation so far.
- example:
- - role: system
- content: You are a helpful assistant.
- - role: user
- content: Hello!
- eos_token:
- type: ['array', 'null']
- items:
- type: integer
- description: A list of endpoint sentence tokens.
- frequency_penalty:
- type: ['number', 'null']
- description: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
- logit_bias:
- type: ['object', 'null']
- description: Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.
- logprobs:
- type: ['boolean', 'null']
- description: Whether to return log probabilities of the output tokens or not.
- max_tokens:
- type: ['integer', 'null']
- description: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
- examples: [200]
- min_tokens:
- type: ['integer', 'null']
- description: |
- The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.
-
- **This field is unsupported when `tools` are specified.**
- default: 0
- n:
- type: ['integer', 'null']
- description: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
- default: 1
- parallel_tool_calls:
- type: ['boolean', 'null']
- description: Whether to enable parallel function calling.
- presence_penalty:
- type: ['number', 'null']
- description: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
- repetition_penalty:
- type: ['number', 'null']
- description: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
- response_format:
- $ref: '#/components/schemas/TextResponseFormat'
- seed:
- type: ['array', 'null']
- items:
- type: integer
- description: Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
- stop:
- type: ['array', 'null']
- items:
- type: string
- description: When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.
- stream:
- type: ['boolean', 'null']
- description: Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.
- stream_options:
- type: ['object', 'null']
- properties:
- include_usage:
- type: ['boolean', 'null']
- description: |
- When set to `true`,
- the number of tokens used will be included at the end of the stream result in the form of
- `"usage": {"completion_tokens": number, "prompt_tokens": number, "total_tokens": number}`.
- description: |
- Options related to stream.
- It can only be used when `stream: true`.
- temperature:
- type: ['number', 'null']
- description: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
- default: 1.0
- timeout_microseconds:
- type: ['integer', 'null']
- description: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
- tool_choice:
- oneOf:
- - title: string
- type: ['string', 'null']
- - title: object
- type: ['object', 'null']
- properties:
- type:
- type: string
- enum:
- - function
- description: The type of the tool. Currently, only `function` is supported.
- function:
- type: object
- properties:
- name:
- type: string
- description: The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
- required:
- - name
- required:
- - type
- - function
- description: |
- Determines the tool calling behavior of the model.
- When set to `none`, the model will bypass tool execution and generate a response directly.
- In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
- Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
- You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
- tools:
- type: ['array', 'null']
- items:
- $ref: '#/components/schemas/Tool'
- description: |
- A list of tools the model may call.
- Currently, only functions are supported as a tool.
- A maximum of 128 functions is supported.
- Use this to provide a list of functions the model may generate JSON inputs for.
-
- **When `tools` are specified, `min_tokens` field is unsupported.**
- top_k:
- type: ['integer', 'null']
- description: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
- default: 0
- top_logprobs:
- type: ['integer', 'null']
- description: The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.
- top_p:
- type: ['number', 'null']
- description: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
- default: 1.0
- required:
- - model
- - messages
- ToolAssistedCompletionRequestBody:
- type: object
- properties:
- model:
- type: string
- description: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
- examples: ['meta-llama-3.1-8b-instruct']
- messages:
- type: array
- items:
- $ref: '#/components/schemas/Message'
- description: A list of messages comprising the conversation so far.
- example:
- - role: system
- content: You are a helpful assistant.
- - role: user
- content: Hello!
- eos_token:
- type: ['array', 'null']
- items:
- type: integer
- description: A list of endpoint sentence tokens.
- frequency_penalty:
- type: ['number', 'null']
- description: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
- max_tokens:
- type: ['integer', 'null']
- description: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
- examples: [200]
- min_tokens:
- type: ['integer', 'null']
- description: |
- The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.
-
- **This field is unsupported when `tools` are specified.**
- default: 0
- n:
- type: ['integer', 'null']
- description: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
- default: 1
- parallel_tool_calls:
- type: ['boolean', 'null']
- description: Whether to enable parallel function calling.
- presence_penalty:
- type: ['number', 'null']
- description: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
- repetition_penalty:
- type: ['number', 'null']
- description: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
- response_format:
- $ref: '#/components/schemas/TextResponseFormat'
- resume_generation:
- type: ['boolean', 'null']
- description: |
- Enable to continue text generation even after an error occurs during a tool call.
-
- Note that enabling this option may use more tokens, as the system generates additional content to handle errors gracefully.
- However, if the system fails more than 8 times, the generation will stop regardless.
-
- ***Tip***
- This is useful in scenarios where you want to maintain text generation flow despite errors, such as when generating long-form content.
- The user will not be interrupted by tool call issues, ensuring a smoother experience.
- seed:
- type: ['array', 'null']
- items:
- type: integer
- description: Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
- stop:
- type: ['array', 'null']
- items:
- type: string
- description: When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.
- stream:
- type: ['boolean', 'null']
- description: |
- Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.
-
- **Caution: `stream: false` is unsupported now.**
- temperature:
- type: ['number', 'null']
- description: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
- default: 1.0
- timeout_microseconds:
- type: ['integer', 'null']
- description: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
- tool_choice:
- oneOf:
- - title: string
- type: ['string', 'null']
- - title: object
- type: ['object', 'null']
- properties:
- type:
- type: string
- enum:
- - function
- description: The type of the tool. Currently, only `function` is supported.
- function:
- type: object
- properties:
- name:
- type: string
- description: The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
- required:
- - name
- required:
- - type
- - function
- description: |
- Determines the tool calling behavior of the model.
- When set to `none`, the model will bypass tool execution and generate a response directly.
- In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
- Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user.
- You can also specify a particular tool by `{"type": "function", "function": {"name": "my_function"}}`.
- tools:
- type: ['array', 'null']
- items:
- $ref: '#/components/schemas/ToolForToolAssistedChat'
- description: |
- A list of tools the model may call.
- A maximum of 128 functions is supported.
- Use this to provide a list of functions the model may generate JSON inputs for.
- For more detailed information about each tool, please refer [here](https://docs.friendli.ai/guides/serverless_endpoints/tools/built_in_tools).
-
- **When `tools` are specified, `min_tokens` field is unsupported.**
- top_k:
- type: ['integer', 'null']
- description: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
- default: 0
- top_p:
- type: ['number', 'null']
- description: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
- default: 1.0
- required:
- - model
- - messages
- CompletionRequestBody:
- oneOf:
- - title: prompt
- $ref: '#/components/schemas/CompletionRequestBodyWithPrompt'
- - title: tokens
- $ref: '#/components/schemas/CompletionRequestBodyWithTokens'
- TokenizationRequestBody:
- type: object
- properties:
- model:
- type: string
- description: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
- example: 'meta-llama-3.1-8b-instruct'
- prompt:
- type: string
- description: Input text prompt to tokenize.
- example: 'What is generative AI?'
- required:
- - model
- - prompt
- DetokenizationRequestBody:
- type: object
- properties:
- model:
- type: string
- description: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
- example: 'meta-llama-3.1-8b-instruct'
- tokens:
- type: array
- items:
- type: integer
- description: A token sequence to detokenize.
- example: [128000, 3923, 374, 1803, 1413, 15592, 30]
-
- ChatCompletionResponse:
- type: object
- required: [choices, usage]
- properties:
- choices:
- type: array
- items:
- $ref: '#/components/schemas/ChatCompletionChoice'
- usage:
- $ref: '#/components/schemas/Usage'
- created:
- type: integer
- description: The Unix timestamp (in seconds) for when the generation completed.
- StreamedChatCompletionResponse:
- description: A server-sent event containing chat completion content.
- type: object
- required: [data]
- properties:
- data:
- type: object
- required: [choices, created]
- properties:
- choices:
- type: array
- items:
- $ref: '#/components/schemas/StreamedChatCompletionChoice'
- usage:
- $ref: '#/components/schemas/Usage'
- created:
- type: integer
- description: The Unix timestamp (in seconds) for when the token sampled.
- StreamedToolAssistedChatCompletionResponse:
- description: A server-sent event containing chat completion content.
- type: object
- required: [data]
- properties:
- data:
- type: object
- required: [choices, created]
- properties:
- choices:
- type: array
- items:
- $ref: '#/components/schemas/StreamedChatCompletionChoice'
- created:
- type: integer
- description: The Unix timestamp (in seconds) for when the token sampled.
- CompletionResponse:
- type: object
- required: [choices, usage]
- properties:
- choices:
- type: array
- items:
- $ref: '#/components/schemas/CompletionChoice'
- usage:
- $ref: '#/components/schemas/Usage'
- StreamedCompletionResponse:
- type: object
- required: [data]
- properties:
- data:
- oneOf:
- - title: token_sampled
- $ref: '#/components/schemas/StreamedCompletionTokenSampled'
- - title: complete
- $ref: '#/components/schemas/StreamedCompletionTokenComplete'
- discriminator:
- propertyName: event
- mapping:
- token_sampled: '#/components/schemas/StreamedCompletionTokenSampled'
- complete: '#/components/schemas/StreamedCompletionTokenComplete'
- TokenizationResponse:
- type: object
- properties:
- tokens:
- type: array
- items:
- type: integer
- description: A token ID.
- description: A list of token IDs.
- DetokenizationResponse:
- type: object
- properties:
- text:
- type: string
- description: Detokenized text output.
-
- CompletionRequestBodyWithPrompt:
- allOf:
- - type: object
- properties:
- prompt:
- type: string
- description: The prompt (i.e., input text) to generate completion for. Either `prompt` or `tokens` field is required.
- examples: ['Say this is a test!']
- required:
- - prompt
- - $ref: '#/components/schemas/CommonCompletionRequestBody'
- CompletionRequestBodyWithTokens:
- allOf:
- - type: object
- properties:
- tokens:
- type: array
- items:
- type: integer
- description: The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required.
- required:
- - tokens
- - $ref: '#/components/schemas/CommonCompletionRequestBody'
- CommonCompletionRequestBody:
- type: object
- properties:
- model:
- type: string
- description: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
- examples: ['meta-llama-3.1-8b-instruct']
- bad_word_tokens:
- type: ['array', 'null']
- items:
- $ref: '#/components/schemas/TokenSequence'
- description: Same as the above `bad_words` field, but receives token sequences instead of text phrases. This is similar to Hugging Face's [`bad_word_ids`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.bad_words_ids) argument.
- bad_words:
- type: ['array', 'null']
- items:
- type: string
- description: |
- Text phrases that should not be generated.
- For a bad word phrase that contains N tokens, if the first N-1 tokens appears at the last of the generated result, the logit for the last token of the phrase is set to -inf.
- Before checking whether a bard word is included in the result, the word is converted into tokens.
- We recommend using `bad_word_tokens` because it is clearer.
- For example, after tokenization, phrases "clear" and " clear" can result in different token sequences due to the prepended space character.
- Defaults to empty list.
- beam_compat_no_post_normalization:
- type: ['boolean', 'null']
- beam_compat_pre_normalization:
- type: ['boolean', 'null']
- beam_search_type:
- type: ['string', 'null']
- description: One of `DETERMINISTIC`, `NAIVE_SAMPLING`, and `STOCHASTIC`. Which beam search type to use. `DETERMINISTIC` means the standard, deterministic beam search, which is similar to Hugging Face's [`beam_search`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_search). Argmuents for controlling random sampling such as `top_k` and `top_p` are not allowed for this option. `NAIVE_SAMPLING` is similar to Hugging Face's [`beam_sample`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationMixin.beam_sample). `STOCHASTIC` means stochastic beam search (more details in [Kool et al. (2019)](https://proceedings.mlr.press/v97/kool19a.html)). This option is ignored if `num_beams` is not provided. Defaults to `DETERMINISTIC`.
- default: 'DETERMINISTIC'
- early_stopping:
- type: ['boolean', 'null']
- description: Whether to stop the beam search when at least `num_beams` beams are finished with the EOS token. Only allowed for beam search. Defaults to false. This is similar to Hugging Face's [`early_stopping`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.early_stopping) argument.
- default: false
- embedding_to_replace:
- type: ['array', 'null']
- items:
- type: number
- description: A list of flattened embedding vectors used for replacing the tokens at the specified indices provided via `token_index_to_replace`.
- encoder_no_repeat_ngram:
- type: ['integer', 'null']
- description: If this exceeds 1, every ngram of that size occurring in the input token sequence cannot appear in the generated result. 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Only allowed for encoder-decoder models. Defaults to 1. This is similar to Hugging Face's [`encoder_no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_no_repeat_ngram_size) argument.
- default: 1
- encoder_repetition_penalty:
- type: ['number', 'null']
- description: Penalizes tokens that have already appeared in the input tokens. Should be greater than or equal to 1.0. 1.0 means no penalty. Only allowed for encoder-decoder models. See [Keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`encoder_repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.encoder_repetition_penalty) argument.
- eos_token:
- type: ['array', 'null']
- items:
- type: integer
- description: A list of endpoint sentence tokens.
- forced_output_tokens:
- type: ['array', 'null']
- items:
- type: integer
- description: A token sequence that is enforced as a generation output. This option can be used when evaluating the model for the datasets with multi-choice problems (e.g., [HellaSwag](https://huggingface.co/datasets/hellaswag), [MMLU](https://huggingface.co/datasets/cais/mmlu)). Use this option with `include_output_logprobs` to get logprobs for the evaluation.
- frequency_penalty:
- type: ['number', 'null']
- description: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
- include_output_logits:
- type: ['boolean', 'null']
- description: Whether to include the output logits to the generation output.
- include_output_logprobs:
- type: ['boolean', 'null']
- description: Whether to include the output logprobs to the generation output.
- length_penalty:
- type: ['number', 'null']
- description: Coefficient for exponential length penalty that is used with beam search. Only allowed for beam search. Defaults to 1.0. This is similar to Hugging Face's [`length_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.length_penalty) argument.
- max_tokens:
- type: ['integer', 'null']
- description: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
- examples: [200]
- max_total_tokens:
- type: ['integer', 'null']
- description: The maximum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `max_tokens` and `max_total_tokens` is allowed. Default value is the model's maximum length. This is similar to Hugging Face's [`max_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_length) argument.
- min_tokens:
- type: ['integer', 'null']
- description: The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument.
- default: 0
- min_total_tokens:
- type: ['integer', 'null']
- description: The minimum number of tokens including both the generated result and the input tokens. Only allowed for decoder-only models. Only one argument between `min_tokens` and `min_total_tokens` is allowed. This is similar to Hugging Face's [`min_length`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.min_length) argument.
- n:
- type: ['integer', 'null']
- description: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
- default: 1
- no_repeat_ngram:
- type: ['integer', 'null']
- description: If this exceeds 1, every ngram of that size can only occur once among the generated result (plus the input tokens for decoder-only models). 1 means that this mechanism is disabled (i.e., you cannot prevent 1-gram from being generated repeatedly). Defaults to 1. This is similar to Hugging Face's [`no_repeat_ngram_size`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.no_repeat_ngram_size) argument.
- default: 1
- num_beams:
- type: ['integer', 'null']
- description: Number of beams for beam search. Numbers between 1 and 31 (both inclusive) are allowed. Default behavior is no beam search. This is similar to Hugging Face's [`num_beams`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_beams) argument.
- presence_penalty:
- type: ['number', 'null']
- description: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
- repetition_penalty:
- type: ['number', 'null']
- description: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
- response_format:
- $ref: '#/components/schemas/TextResponseFormat'
- seed:
- type: ['array', 'null']
- items:
- type: integer
- description: Seed to control random procedure. If nothing is given, the API generate the seed randomly, use it for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
- stop:
- type: ['array', 'null']
- items:
- type: string
- description: |
- When one of the stop phrases appears in the generation result, the API will stop generation.
- The stop phrases are excluded from the result.
- This option is incompatible with beam search (specified by `num_beams`); use `stop_tokens` for that case instead.
- Defaults to empty list.
- stop_tokens:
- type: ['array', 'null']
- items:
- $ref: '#/components/schemas/TokenSequence'
- description: |
- Stop generating further tokens when generated token corresponds to any of the tokens in the sequence.
- If beam search is enabled, all of the active beams should contain the stop token to terminate generation.
- stream:
- type: ['boolean', 'null']
- description: Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. Not supported when using beam search.
- temperature:
- type: ['number', 'null']
- description: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
- default: 1.0
- timeout_microseconds:
- type: ['integer', 'null']
- description: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
- token_index_to_replace:
- type: ['array', 'null']
- items:
- type: integer
- description: A list of token indices where to replace the embeddings of input tokens provided via either `tokens` or `prompt`.
- top_k:
- type: ['integer', 'null']
- description: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
- examples: [1]
- default: 0
- top_p:
- type: ['number', 'null']
- description: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
- default: 1.0
- required:
- - model
-
- ChatCompletionChoice:
- type: object
- required: [index, message, finish_reason]
- properties:
- index:
- type: integer
- description: The index of the choice in the list of generated choices.
- examples: [0]
- message:
- type: object
- required: [role]
- properties:
- role:
- type: string
- description: Role of the generated message author, in this case `assistant`.
- content:
- type: string
- description: The contents of the assistant message.
- tool_calls:
- type: array
- items:
- type: object
- required: [id, type, function]
- properties:
- id:
- type: string
- description: The ID of the tool call.
- type:
- type: string
- enum:
- - function
- description: The type of the tool.
- function:
- type: object
- required: [name, arguments]
- properties:
- name:
- type: string
- description: The name of the function to call.
- arguments:
- type: string
- description: |
- The arguments for calling the function, generated by the model in JSON format.
- Ensure to validate these arguments in your code before invoking the function since the model may not always produce valid JSON.
- finish_reason:
- type: string
- description: |
- Termination condition of the generation. `stop` means the API returned the full chat completion generated by the model without running into any limits.
- `length` means the generation exceeded `max_tokens` or the conversation exceeded the max context length.
- `tool_calls` means the API has generated tool calls.
- logprobs:
- $ref: '#/components/schemas/Logprobs'
- StreamedChatCompletionChoice:
- type: object
- required: [index, delta]
- properties:
- index:
- type: integer
- description: The index of the choice in the list of generated choices.
- examples: [0]
- delta:
- type: object
- properties:
- role:
- type: string
- description: Role of the generated message author, in this case `assistant`.
- content:
- type: string
- description: The contents of the assistant message.
- tool_calls:
- type: object
- required: [index, id, type, function]
- properties:
- index:
- type: integer
- description: The index of tool call being generated.
- id:
- type: string
- description: The ID of the tool call.
- type:
- type: string
- enum:
- - function
- description: The type of the tool.
- function:
- type: object
- required: [name, arguments]
- properties:
- name:
- type: string
- description: The name of the function to call.
- arguments:
- type: string
- description: |
- The arguments for calling the function, generated by the model in JSON format.
- Ensure to validate these arguments in your code before invoking the function since the model may not always produce valid JSON.
- finish_reason:
- type: ['string', 'null']
- description: Termination condition of the generation. `stop` means the API returned the full chat completion generated by the model without running into any limits. `length` means the generation exceeded `max_tokens` or the conversation exceeded the max context length.
- logprobs:
- $ref: '#/components/schemas/Logprobs'
- ToolAssistedChatToolStatusEvent:
- type: object
- required: [tool_call_id, name, status, parameters, result, timestamp]
- properties:
- tool_call_id:
- type: string
- description: The ID of the tool call.
- name:
- type: string
- enum:
- - math:calculator
- - math:statistics
- - math:calendar
- - web:search
- - web:url
- - code:python-interpreter
- - file:text
- description: The name of the built-in tool.
- status:
- type: string
- enum:
- - STARTED
- - UPDATING
- - ENDED
- - ERRORED
- description: Indicates the current execution status of the tool.
- parameters:
- type: array
- items:
- type: object
- required: [name, value]
- properties:
- name:
- type: string
- description: The name of the tool’s function parameter.
- value:
- type: string
- description: The value of the tool’s function parameter.
- result:
- type: string
- description: The output from the tool’s execution.
- files:
- type: array
- items:
- type: object
- required: [name, url]
- properties:
- name:
- type: string
- description: The name of the file generated by the tool’s execution.
- url:
- type: string
- description: URL of the file generated by the tool’s execution.
- message:
- type: string
- description: Message generated by the tool’s execution.
- error:
- type: object
- required: [type, msg]
- properties:
- type:
- type: string
- description: The type of error encountered during the tool’s execution.
- msg:
- type: string
- description: The message of error.
- timestamp:
- type: number
- description: The Unix timestamp (in seconds) for when the event occurred.
- CompletionChoice:
- type: object
- required: [index, seed, text, tokens]
- properties:
- index:
- type: integer
- description: The index of the choice in the list of generated choices.
- examples: [0]
- seed:
- type: integer
- description: Random seed used for the generation.
- examples: [42]
- text:
- type: string
- description: Generated text output.
- examples: ['This is indeed a test']
- tokens:
- type: array
- items:
- type: integer
- description: Generated output tokens.
- example: [128000, 2028, 374, 13118, 264, 1296]
- StreamedCompletionTokenSampled:
- type: object
- required: [event, index, text, token]
- properties:
- event:
- type: string
- enum: [token_sampled]
- description: Type of server-sent event.
- index:
- type: integer
- description: The index of the choice in the list of generated choices.
- text:
- type: string
- description: Generated text output.
- token:
- type: integer
- description: Generated output token.
- StreamedCompletionTokenComplete:
- type: object
- required: [event, choices, usage]
- properties:
- event:
- type: string
- enum: [complete]
- description: Type of server-sent event.
- choices:
- type: array
- items:
- $ref: '#/components/schemas/CompletionChoice'
- usage:
- $ref: '#/components/schemas/Usage'
-
- TokenSequence:
- type: object
- properties:
- tokens:
- type: array
- items:
- type: integer
- description: A List of token IDs.
- TextResponseFormat:
- type: ['object', 'null']
- description: |
- The enforced format of the model's output.
-
- Note that the content of the output message may be truncated if it exceeds the `max_tokens`.
- You can check this by verifying that the `finish_reason` of the output message is `length`.
-
- ***Important***
- You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`).
- Otherwise, the model may result in an unending stream of whitespace or other characters.
- properties:
- type:
- type: string
- enum:
- - text
- - json_object
- - regex
- description: Type of the response format.
- schema:
- type: string
- description: |
- The schema of the output. For `{ "type": "json_object" }`, `schema` should be a serialized string of JSON schema. For `{ "type": "regex" }`, `schema` should be a regex pattern.
-
- ***Caveat***
- For the JSON object type, recursive definitions are not supported. Optional properties are also not supported; all properties of `{ "type": "object" }` are generated regardless of whether they are listed in the `required` field.
- For the regex type, lookaheads/lookbehinds (e.g., `\a`, `\z`, `^`, `$`, `(?=)`, `(?!)`, `(?<=...)`, `(? models.ChatCompletionResponse1:
+ ) -> models.ChatCompletionResponse:
r"""Chat completion
Given a list of messages forming a conversation, the model generates a response.
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param messages: A list of messages comprising the conversation so far.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param chat_completion_request_body:
+ :param eos_token: A list of endpoint sentence tokens.
+ :param frequency_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
+ :param logit_bias: Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.
+ :param logprobs: Whether to return log probabilities of the output tokens or not.
+ :param max_tokens: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
+ :param min_tokens: The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. **This field is unsupported when `tools` are specified.**
+ :param n: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
+ :param parallel_tool_calls: Whether to enable parallel function calling.
+ :param presence_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
+ :param repetition_penalty: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
+ :param response_format: The enforced format of the model's output. Note that the content of the output message may be truncated if it exceeds the `max_tokens`. You can check this by verifying that the `finish_reason` of the output message is `length`. ***Important*** You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). Otherwise, the model may result in an unending stream of whitespace or other characters.
+ :param seed: Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
+ :param stop: When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.
+ :param stream: Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.
+ :param stream_options: Options related to stream. It can only be used when `stream: true`.
+ :param temperature: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
+ :param timeout_microseconds: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
+ :param tool_choice: Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`.
+ :param tools: A list of tools the model may call. Currently, only functions are supported as a tool. A maximum of 128 functions is supported. Use this to provide a list of functions the model may generate JSON inputs for. **When `tools` are specified, `min_tokens` field is unsupported.**
+ :param top_k: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
+ :param top_logprobs: The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.
+ :param top_p: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -56,8 +107,41 @@ def chat_completion(
request = models.ChatCompletionRequest(
x_friendli_team=x_friendli_team,
- chat_completion_request_body=utils.get_pydantic_model(
- chat_completion_request_body, Optional[models.ChatCompletionRequestBody]
+ chat_completion_body=models.ChatCompletionBody(
+ model=model,
+ messages=utils.get_pydantic_model(messages, List[models.Message]),
+ eos_token=eos_token,
+ frequency_penalty=frequency_penalty,
+ logit_bias=utils.get_pydantic_model(
+ logit_bias, OptionalNullable[models.LogitBias]
+ ),
+ logprobs=logprobs,
+ max_tokens=max_tokens,
+ min_tokens=min_tokens,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ presence_penalty=presence_penalty,
+ repetition_penalty=repetition_penalty,
+ response_format=utils.get_pydantic_model(
+ response_format, OptionalNullable[models.TextResponseFormat]
+ ),
+ seed=seed,
+ stop=stop,
+ stream=stream,
+ stream_options=utils.get_pydantic_model(
+ stream_options, OptionalNullable[models.StreamOptions]
+ ),
+ temperature=temperature,
+ timeout_microseconds=timeout_microseconds,
+ tool_choice=utils.get_pydantic_model(
+ tool_choice, OptionalNullable[models.ToolChoice]
+ ),
+ tools=utils.get_pydantic_model(
+ tools, OptionalNullable[List[models.Tool]]
+ ),
+ top_k=top_k,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
),
)
@@ -67,7 +151,7 @@ def chat_completion(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
@@ -76,11 +160,11 @@ def chat_completion(
else "application/json;q=1, text/event-stream;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.chat_completion_request_body,
+ request.chat_completion_body,
+ False,
False,
- True,
"json",
- Optional[models.ChatCompletionRequestBody],
+ models.ChatCompletionBody,
),
timeout_ms=timeout_ms,
)
@@ -109,14 +193,12 @@ def chat_completion(
if utils.match_response(http_res, "200", "application/json"):
http_response_text = utils.stream_to_text(http_res)
- return utils.unmarshal_json(
- http_response_text, models.ChatCompletionResponse
- )
+ return utils.unmarshal_json(http_response_text, models.ChatCompletionResult)
if utils.match_response(http_res, "200", "text/event-stream"):
return eventstreaming.stream_events(
http_res,
lambda raw: utils.unmarshal_json(
- raw, models.StreamedChatCompletionResponse
+ raw, models.StreamedChatCompletionResult
),
sentinel="[DONE]",
)
@@ -138,24 +220,75 @@ def chat_completion(
async def chat_completion_async(
self,
*,
+ model: str,
+ messages: Union[List[models.Message], List[models.MessageTypedDict]],
x_friendli_team: Optional[str] = None,
- chat_completion_request_body: Optional[
- Union[
- models.ChatCompletionRequestBody,
- models.ChatCompletionRequestBodyTypedDict,
- ]
- ] = None,
+ eos_token: OptionalNullable[List[int]] = UNSET,
+ frequency_penalty: OptionalNullable[float] = UNSET,
+ logit_bias: OptionalNullable[
+ Union[models.LogitBias, models.LogitBiasTypedDict]
+ ] = UNSET,
+ logprobs: OptionalNullable[bool] = UNSET,
+ max_tokens: OptionalNullable[int] = UNSET,
+ min_tokens: OptionalNullable[int] = 0,
+ n: OptionalNullable[int] = 1,
+ parallel_tool_calls: OptionalNullable[bool] = UNSET,
+ presence_penalty: OptionalNullable[float] = UNSET,
+ repetition_penalty: OptionalNullable[float] = UNSET,
+ response_format: OptionalNullable[
+ Union[models.TextResponseFormat, models.TextResponseFormatTypedDict]
+ ] = UNSET,
+ seed: OptionalNullable[List[int]] = UNSET,
+ stop: OptionalNullable[List[str]] = UNSET,
+ stream: OptionalNullable[bool] = UNSET,
+ stream_options: OptionalNullable[
+ Union[models.StreamOptions, models.StreamOptionsTypedDict]
+ ] = UNSET,
+ temperature: OptionalNullable[float] = 1,
+ timeout_microseconds: OptionalNullable[int] = UNSET,
+ tool_choice: OptionalNullable[
+ Union[models.ToolChoice, models.ToolChoiceTypedDict]
+ ] = UNSET,
+ tools: OptionalNullable[
+ Union[List[models.Tool], List[models.ToolTypedDict]]
+ ] = UNSET,
+ top_k: OptionalNullable[int] = 0,
+ top_logprobs: OptionalNullable[int] = UNSET,
+ top_p: OptionalNullable[float] = 1,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
accept_header_override: Optional[ChatCompletionAcceptEnum] = None,
- ) -> models.ChatCompletionResponse1:
+ ) -> models.ChatCompletionResponse:
r"""Chat completion
Given a list of messages forming a conversation, the model generates a response.
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param messages: A list of messages comprising the conversation so far.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param chat_completion_request_body:
+ :param eos_token: A list of endpoint sentence tokens.
+ :param frequency_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
+ :param logit_bias: Accepts a JSON object that maps tokens to an associated bias value. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model.
+ :param logprobs: Whether to return log probabilities of the output tokens or not.
+ :param max_tokens: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
+ :param min_tokens: The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. **This field is unsupported when `tools` are specified.**
+ :param n: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
+ :param parallel_tool_calls: Whether to enable parallel function calling.
+ :param presence_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
+ :param repetition_penalty: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
+ :param response_format: The enforced format of the model's output. Note that the content of the output message may be truncated if it exceeds the `max_tokens`. You can check this by verifying that the `finish_reason` of the output message is `length`. ***Important*** You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). Otherwise, the model may result in an unending stream of whitespace or other characters.
+ :param seed: Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
+ :param stop: When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.
+ :param stream: Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated.
+ :param stream_options: Options related to stream. It can only be used when `stream: true`.
+ :param temperature: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
+ :param timeout_microseconds: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
+ :param tool_choice: Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`.
+ :param tools: A list of tools the model may call. Currently, only functions are supported as a tool. A maximum of 128 functions is supported. Use this to provide a list of functions the model may generate JSON inputs for. **When `tools` are specified, `min_tokens` field is unsupported.**
+ :param top_k: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
+ :param top_logprobs: The number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.
+ :param top_p: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -171,8 +304,41 @@ async def chat_completion_async(
request = models.ChatCompletionRequest(
x_friendli_team=x_friendli_team,
- chat_completion_request_body=utils.get_pydantic_model(
- chat_completion_request_body, Optional[models.ChatCompletionRequestBody]
+ chat_completion_body=models.ChatCompletionBody(
+ model=model,
+ messages=utils.get_pydantic_model(messages, List[models.Message]),
+ eos_token=eos_token,
+ frequency_penalty=frequency_penalty,
+ logit_bias=utils.get_pydantic_model(
+ logit_bias, OptionalNullable[models.LogitBias]
+ ),
+ logprobs=logprobs,
+ max_tokens=max_tokens,
+ min_tokens=min_tokens,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ presence_penalty=presence_penalty,
+ repetition_penalty=repetition_penalty,
+ response_format=utils.get_pydantic_model(
+ response_format, OptionalNullable[models.TextResponseFormat]
+ ),
+ seed=seed,
+ stop=stop,
+ stream=stream,
+ stream_options=utils.get_pydantic_model(
+ stream_options, OptionalNullable[models.StreamOptions]
+ ),
+ temperature=temperature,
+ timeout_microseconds=timeout_microseconds,
+ tool_choice=utils.get_pydantic_model(
+ tool_choice, OptionalNullable[models.ToolChoice]
+ ),
+ tools=utils.get_pydantic_model(
+ tools, OptionalNullable[List[models.Tool]]
+ ),
+ top_k=top_k,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
),
)
@@ -182,7 +348,7 @@ async def chat_completion_async(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
@@ -191,11 +357,11 @@ async def chat_completion_async(
else "application/json;q=1, text/event-stream;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.chat_completion_request_body,
+ request.chat_completion_body,
+ False,
False,
- True,
"json",
- Optional[models.ChatCompletionRequestBody],
+ models.ChatCompletionBody,
),
timeout_ms=timeout_ms,
)
@@ -224,14 +390,12 @@ async def chat_completion_async(
if utils.match_response(http_res, "200", "application/json"):
http_response_text = await utils.stream_to_text_async(http_res)
- return utils.unmarshal_json(
- http_response_text, models.ChatCompletionResponse
- )
+ return utils.unmarshal_json(http_response_text, models.ChatCompletionResult)
if utils.match_response(http_res, "200", "text/event-stream"):
return eventstreaming.stream_events_async(
http_res,
lambda raw: utils.unmarshal_json(
- raw, models.StreamedChatCompletionResponse
+ raw, models.StreamedChatCompletionResult
),
sentinel="[DONE]",
)
@@ -253,21 +417,19 @@ async def chat_completion_async(
def completion(
self,
*,
+ completion_body: Union[models.CompletionBody, models.CompletionBodyTypedDict],
x_friendli_team: Optional[str] = None,
- completion_request_body: Optional[
- Union[models.CompletionRequestBody, models.CompletionRequestBodyTypedDict]
- ] = None,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
accept_header_override: Optional[CompletionAcceptEnum] = None,
- ) -> models.CompletionResponse1:
+ ) -> models.CompletionResponse:
r"""Completion
Generate text based on the given text prompt.
+ :param completion_body:
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param completion_request_body:
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -283,8 +445,8 @@ def completion(
request = models.CompletionRequest(
x_friendli_team=x_friendli_team,
- completion_request_body=utils.get_pydantic_model(
- completion_request_body, Optional[models.CompletionRequestBody]
+ completion_body=utils.get_pydantic_model(
+ completion_body, models.CompletionBody
),
)
@@ -294,7 +456,7 @@ def completion(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
@@ -303,11 +465,7 @@ def completion(
else "application/json;q=1, text/event-stream;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.completion_request_body,
- False,
- True,
- "json",
- Optional[models.CompletionRequestBody],
+ request.completion_body, False, False, "json", models.CompletionBody
),
timeout_ms=timeout_ms,
)
@@ -336,13 +494,11 @@ def completion(
if utils.match_response(http_res, "200", "application/json"):
http_response_text = utils.stream_to_text(http_res)
- return utils.unmarshal_json(http_response_text, models.CompletionResponse)
+ return utils.unmarshal_json(http_response_text, models.CompletionResult)
if utils.match_response(http_res, "200", "text/event-stream"):
return eventstreaming.stream_events(
http_res,
- lambda raw: utils.unmarshal_json(
- raw, models.StreamedCompletionResponse
- ),
+ lambda raw: utils.unmarshal_json(raw, models.StreamedCompletionResult),
sentinel="[DONE]",
)
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
@@ -363,21 +519,19 @@ def completion(
async def completion_async(
self,
*,
+ completion_body: Union[models.CompletionBody, models.CompletionBodyTypedDict],
x_friendli_team: Optional[str] = None,
- completion_request_body: Optional[
- Union[models.CompletionRequestBody, models.CompletionRequestBodyTypedDict]
- ] = None,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
accept_header_override: Optional[CompletionAcceptEnum] = None,
- ) -> models.CompletionResponse1:
+ ) -> models.CompletionResponse:
r"""Completion
Generate text based on the given text prompt.
+ :param completion_body:
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param completion_request_body:
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -393,8 +547,8 @@ async def completion_async(
request = models.CompletionRequest(
x_friendli_team=x_friendli_team,
- completion_request_body=utils.get_pydantic_model(
- completion_request_body, Optional[models.CompletionRequestBody]
+ completion_body=utils.get_pydantic_model(
+ completion_body, models.CompletionBody
),
)
@@ -404,7 +558,7 @@ async def completion_async(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
@@ -413,11 +567,7 @@ async def completion_async(
else "application/json;q=1, text/event-stream;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.completion_request_body,
- False,
- True,
- "json",
- Optional[models.CompletionRequestBody],
+ request.completion_body, False, False, "json", models.CompletionBody
),
timeout_ms=timeout_ms,
)
@@ -446,13 +596,11 @@ async def completion_async(
if utils.match_response(http_res, "200", "application/json"):
http_response_text = await utils.stream_to_text_async(http_res)
- return utils.unmarshal_json(http_response_text, models.CompletionResponse)
+ return utils.unmarshal_json(http_response_text, models.CompletionResult)
if utils.match_response(http_res, "200", "text/event-stream"):
return eventstreaming.stream_events_async(
http_res,
- lambda raw: utils.unmarshal_json(
- raw, models.StreamedCompletionResponse
- ),
+ lambda raw: utils.unmarshal_json(raw, models.StreamedCompletionResult),
sentinel="[DONE]",
)
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
@@ -473,22 +621,20 @@ async def completion_async(
def tokenization(
self,
*,
+ model: str,
+ prompt: str,
x_friendli_team: Optional[str] = None,
- tokenization_request_body: Optional[
- Union[
- models.TokenizationRequestBody, models.TokenizationRequestBodyTypedDict
- ]
- ] = None,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
- ) -> models.TokenizationResponse:
+ ) -> models.TokenizationResult:
r"""Tokenization
By giving a text input, generate a tokenized output of token IDs.
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param prompt: Input text prompt to tokenize.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param tokenization_request_body:
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -503,8 +649,9 @@ def tokenization(
request = models.TokenizationRequest(
x_friendli_team=x_friendli_team,
- tokenization_request_body=utils.get_pydantic_model(
- tokenization_request_body, Optional[models.TokenizationRequestBody]
+ tokenization_body=models.TokenizationBody(
+ model=model,
+ prompt=prompt,
),
)
@@ -514,18 +661,14 @@ def tokenization(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
accept_header_value="application/json",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.tokenization_request_body,
- False,
- True,
- "json",
- Optional[models.TokenizationRequestBody],
+ request.tokenization_body, False, False, "json", models.TokenizationBody
),
timeout_ms=timeout_ms,
)
@@ -552,7 +695,7 @@ def tokenization(
)
if utils.match_response(http_res, "200", "application/json"):
- return utils.unmarshal_json(http_res.text, models.TokenizationResponse)
+ return utils.unmarshal_json(http_res.text, models.TokenizationResult)
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
http_res_text = utils.stream_to_text(http_res)
raise models.SDKError(
@@ -571,22 +714,20 @@ def tokenization(
async def tokenization_async(
self,
*,
+ model: str,
+ prompt: str,
x_friendli_team: Optional[str] = None,
- tokenization_request_body: Optional[
- Union[
- models.TokenizationRequestBody, models.TokenizationRequestBodyTypedDict
- ]
- ] = None,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
- ) -> models.TokenizationResponse:
+ ) -> models.TokenizationResult:
r"""Tokenization
By giving a text input, generate a tokenized output of token IDs.
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param prompt: Input text prompt to tokenize.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param tokenization_request_body:
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -601,8 +742,9 @@ async def tokenization_async(
request = models.TokenizationRequest(
x_friendli_team=x_friendli_team,
- tokenization_request_body=utils.get_pydantic_model(
- tokenization_request_body, Optional[models.TokenizationRequestBody]
+ tokenization_body=models.TokenizationBody(
+ model=model,
+ prompt=prompt,
),
)
@@ -612,18 +754,14 @@ async def tokenization_async(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
accept_header_value="application/json",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.tokenization_request_body,
- False,
- True,
- "json",
- Optional[models.TokenizationRequestBody],
+ request.tokenization_body, False, False, "json", models.TokenizationBody
),
timeout_ms=timeout_ms,
)
@@ -650,7 +788,7 @@ async def tokenization_async(
)
if utils.match_response(http_res, "200", "application/json"):
- return utils.unmarshal_json(http_res.text, models.TokenizationResponse)
+ return utils.unmarshal_json(http_res.text, models.TokenizationResult)
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
http_res_text = await utils.stream_to_text_async(http_res)
raise models.SDKError(
@@ -670,22 +808,19 @@ def detokenization(
self,
*,
x_friendli_team: Optional[str] = None,
- detokenization_request_body: Optional[
- Union[
- models.DetokenizationRequestBody,
- models.DetokenizationRequestBodyTypedDict,
- ]
- ] = None,
+ model: Optional[str] = None,
+ tokens: Optional[List[int]] = None,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
- ) -> models.DetokenizationResponse:
+ ) -> models.DetokenizationResult:
r"""Detokenization
By giving a list of tokens, generate a detokenized output text string.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param detokenization_request_body:
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param tokens: A token sequence to detokenize.
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -700,8 +835,9 @@ def detokenization(
request = models.DetokenizationRequest(
x_friendli_team=x_friendli_team,
- detokenization_request_body=utils.get_pydantic_model(
- detokenization_request_body, Optional[models.DetokenizationRequestBody]
+ detokenization_body=models.DetokenizationBody(
+ model=model,
+ tokens=tokens,
),
)
@@ -711,18 +847,18 @@ def detokenization(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
accept_header_value="application/json",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.detokenization_request_body,
+ request.detokenization_body,
+ False,
False,
- True,
"json",
- Optional[models.DetokenizationRequestBody],
+ models.DetokenizationBody,
),
timeout_ms=timeout_ms,
)
@@ -749,7 +885,7 @@ def detokenization(
)
if utils.match_response(http_res, "200", "application/json"):
- return utils.unmarshal_json(http_res.text, models.DetokenizationResponse)
+ return utils.unmarshal_json(http_res.text, models.DetokenizationResult)
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
http_res_text = utils.stream_to_text(http_res)
raise models.SDKError(
@@ -769,22 +905,19 @@ async def detokenization_async(
self,
*,
x_friendli_team: Optional[str] = None,
- detokenization_request_body: Optional[
- Union[
- models.DetokenizationRequestBody,
- models.DetokenizationRequestBodyTypedDict,
- ]
- ] = None,
+ model: Optional[str] = None,
+ tokens: Optional[List[int]] = None,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
- ) -> models.DetokenizationResponse:
+ ) -> models.DetokenizationResult:
r"""Detokenization
By giving a list of tokens, generate a detokenized output text string.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param detokenization_request_body:
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param tokens: A token sequence to detokenize.
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -799,8 +932,9 @@ async def detokenization_async(
request = models.DetokenizationRequest(
x_friendli_team=x_friendli_team,
- detokenization_request_body=utils.get_pydantic_model(
- detokenization_request_body, Optional[models.DetokenizationRequestBody]
+ detokenization_body=models.DetokenizationBody(
+ model=model,
+ tokens=tokens,
),
)
@@ -810,18 +944,18 @@ async def detokenization_async(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
accept_header_value="application/json",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.detokenization_request_body,
+ request.detokenization_body,
+ False,
False,
- True,
"json",
- Optional[models.DetokenizationRequestBody],
+ models.DetokenizationBody,
),
timeout_ms=timeout_ms,
)
@@ -848,7 +982,7 @@ async def detokenization_async(
)
if utils.match_response(http_res, "200", "application/json"):
- return utils.unmarshal_json(http_res.text, models.DetokenizationResponse)
+ return utils.unmarshal_json(http_res.text, models.DetokenizationResult)
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
http_res_text = await utils.stream_to_text_async(http_res)
raise models.SDKError(
diff --git a/src/friendli/models/__init__.py b/src/friendli/models/__init__.py
index de85ff4..6f51210 100644
--- a/src/friendli/models/__init__.py
+++ b/src/friendli/models/__init__.py
@@ -10,6 +10,21 @@
ToolCalls,
ToolCallsTypedDict,
)
+from .chatcompletionbody import (
+ ChatCompletionBody,
+ ChatCompletionBodyTypedDict,
+ LogitBias,
+ LogitBiasTypedDict,
+ Object,
+ ObjectTypedDict,
+ StreamOptions,
+ StreamOptionsTypedDict,
+ ToolChoice,
+ ToolChoiceFunction,
+ ToolChoiceFunctionTypedDict,
+ ToolChoiceType,
+ ToolChoiceTypedDict,
+)
from .chatcompletionchoice import (
ChatCompletionChoice,
ChatCompletionChoiceFunction,
@@ -24,54 +39,30 @@
from .chatcompletionop import (
ChatCompletionRequest,
ChatCompletionRequestTypedDict,
- ChatCompletionResponse1,
- ChatCompletionResponse1TypedDict,
-)
-from .chatcompletionrequestbody import (
- ChatCompletionRequestBody,
- ChatCompletionRequestBodyTypedDict,
- LogitBias,
- LogitBiasTypedDict,
- Object,
- ObjectTypedDict,
- StreamOptions,
- StreamOptionsTypedDict,
- ToolChoice,
- ToolChoiceFunction,
- ToolChoiceFunctionTypedDict,
- ToolChoiceType,
- ToolChoiceTypedDict,
-)
-from .chatcompletionresponse import (
ChatCompletionResponse,
ChatCompletionResponseTypedDict,
)
+from .chatcompletionresult import ChatCompletionResult, ChatCompletionResultTypedDict
+from .completionbody import CompletionBody, CompletionBodyTypedDict
+from .completionbodywithprompt import (
+ CompletionBodyWithPrompt,
+ CompletionBodyWithPromptTypedDict,
+)
+from .completionbodywithtokens import (
+ CompletionBodyWithTokens,
+ CompletionBodyWithTokensTypedDict,
+)
from .completionchoice import CompletionChoice, CompletionChoiceTypedDict
from .completionop import (
CompletionRequest,
CompletionRequestTypedDict,
- CompletionResponse1,
- CompletionResponse1TypedDict,
-)
-from .completionrequestbody import CompletionRequestBody, CompletionRequestBodyTypedDict
-from .completionrequestbodywithprompt import (
- CompletionRequestBodyWithPrompt,
- CompletionRequestBodyWithPromptTypedDict,
+ CompletionResponse,
+ CompletionResponseTypedDict,
)
-from .completionrequestbodywithtokens import (
- CompletionRequestBodyWithTokens,
- CompletionRequestBodyWithTokensTypedDict,
-)
-from .completionresponse import CompletionResponse, CompletionResponseTypedDict
+from .completionresult import CompletionResult, CompletionResultTypedDict
+from .detokenizationbody import DetokenizationBody, DetokenizationBodyTypedDict
from .detokenizationop import DetokenizationRequest, DetokenizationRequestTypedDict
-from .detokenizationrequestbody import (
- DetokenizationRequestBody,
- DetokenizationRequestBodyTypedDict,
-)
-from .detokenizationresponse import (
- DetokenizationResponse,
- DetokenizationResponseTypedDict,
-)
+from .detokenizationresult import DetokenizationResult, DetokenizationResultTypedDict
from .filebuiltintool import (
FileBuiltInTool,
FileBuiltInToolType,
@@ -106,17 +97,17 @@
StreamedChatCompletionChoiceType,
StreamedChatCompletionChoiceTypedDict,
)
-from .streamedchatcompletionresponse import (
+from .streamedchatcompletionresult import (
Data,
DataTypedDict,
- StreamedChatCompletionResponse,
- StreamedChatCompletionResponseTypedDict,
+ StreamedChatCompletionResult,
+ StreamedChatCompletionResultTypedDict,
)
-from .streamedcompletionresponse import (
- StreamedCompletionResponse,
- StreamedCompletionResponseData,
- StreamedCompletionResponseDataTypedDict,
- StreamedCompletionResponseTypedDict,
+from .streamedcompletionresult import (
+ StreamedCompletionResult,
+ StreamedCompletionResultData,
+ StreamedCompletionResultDataTypedDict,
+ StreamedCompletionResultTypedDict,
)
from .streamedcompletiontokencomplete import (
StreamedCompletionTokenComplete,
@@ -128,20 +119,17 @@
StreamedCompletionTokenSampled,
StreamedCompletionTokenSampledTypedDict,
)
-from .streamedtoolassistedchatcompletionresponse import (
- StreamedToolAssistedChatCompletionResponse,
- StreamedToolAssistedChatCompletionResponseData,
- StreamedToolAssistedChatCompletionResponseDataTypedDict,
- StreamedToolAssistedChatCompletionResponseTypedDict,
+from .streamedtoolassistedchatcompletionresult import (
+ StreamedToolAssistedChatCompletionResult,
+ StreamedToolAssistedChatCompletionResultData,
+ StreamedToolAssistedChatCompletionResultDataTypedDict,
+ StreamedToolAssistedChatCompletionResultTypedDict,
)
from .systemmessage import Role, SystemMessage, SystemMessageTypedDict
from .textresponseformat import TextResponseFormat, TextResponseFormatTypedDict, Type
+from .tokenizationbody import TokenizationBody, TokenizationBodyTypedDict
from .tokenizationop import TokenizationRequest, TokenizationRequestTypedDict
-from .tokenizationrequestbody import (
- TokenizationRequestBody,
- TokenizationRequestBodyTypedDict,
-)
-from .tokenizationresponse import TokenizationResponse, TokenizationResponseTypedDict
+from .tokenizationresult import TokenizationResult, TokenizationResultTypedDict
from .tokensequence import TokenSequence, TokenSequenceTypedDict
from .tool import Tool, ToolType, ToolTypedDict
from .toolassistedchatcompletionop import (
@@ -151,14 +139,14 @@
ToolAssistedChatCompletionResponse,
ToolAssistedChatCompletionResponseTypedDict,
)
-from .toolassistedcompletionrequestbody import (
- ToolAssistedCompletionRequestBody,
- ToolAssistedCompletionRequestBodyToolChoice,
- ToolAssistedCompletionRequestBodyToolChoiceFunction,
- ToolAssistedCompletionRequestBodyToolChoiceFunctionTypedDict,
- ToolAssistedCompletionRequestBodyToolChoiceType,
- ToolAssistedCompletionRequestBodyToolChoiceTypedDict,
- ToolAssistedCompletionRequestBodyTypedDict,
+from .toolassistedcompletionbody import (
+ ToolAssistedCompletionBody,
+ ToolAssistedCompletionBodyToolChoice,
+ ToolAssistedCompletionBodyToolChoiceFunction,
+ ToolAssistedCompletionBodyToolChoiceFunctionTypedDict,
+ ToolAssistedCompletionBodyToolChoiceType,
+ ToolAssistedCompletionBodyToolChoiceTypedDict,
+ ToolAssistedCompletionBodyTypedDict,
ToolChoiceObject,
ToolChoiceObjectTypedDict,
)
@@ -177,6 +165,8 @@
"AssistantMessageRole",
"AssistantMessageType",
"AssistantMessageTypedDict",
+ "ChatCompletionBody",
+ "ChatCompletionBodyTypedDict",
"ChatCompletionChoice",
"ChatCompletionChoiceFunction",
"ChatCompletionChoiceFunctionTypedDict",
@@ -187,39 +177,37 @@
"ChatCompletionChoiceType",
"ChatCompletionChoiceTypedDict",
"ChatCompletionRequest",
- "ChatCompletionRequestBody",
- "ChatCompletionRequestBodyTypedDict",
"ChatCompletionRequestTypedDict",
"ChatCompletionResponse",
- "ChatCompletionResponse1",
- "ChatCompletionResponse1TypedDict",
"ChatCompletionResponseTypedDict",
+ "ChatCompletionResult",
+ "ChatCompletionResultTypedDict",
+ "CompletionBody",
+ "CompletionBodyTypedDict",
+ "CompletionBodyWithPrompt",
+ "CompletionBodyWithPromptTypedDict",
+ "CompletionBodyWithTokens",
+ "CompletionBodyWithTokensTypedDict",
"CompletionChoice",
"CompletionChoiceTypedDict",
"CompletionRequest",
- "CompletionRequestBody",
- "CompletionRequestBodyTypedDict",
- "CompletionRequestBodyWithPrompt",
- "CompletionRequestBodyWithPromptTypedDict",
- "CompletionRequestBodyWithTokens",
- "CompletionRequestBodyWithTokensTypedDict",
"CompletionRequestTypedDict",
"CompletionResponse",
- "CompletionResponse1",
- "CompletionResponse1TypedDict",
"CompletionResponseTypedDict",
+ "CompletionResult",
+ "CompletionResultTypedDict",
"Content",
"ContentTypedDict",
"Data",
"DataTypedDict",
"Delta",
"DeltaTypedDict",
+ "DetokenizationBody",
+ "DetokenizationBodyTypedDict",
"DetokenizationRequest",
- "DetokenizationRequestBody",
- "DetokenizationRequestBodyTypedDict",
"DetokenizationRequestTypedDict",
- "DetokenizationResponse",
- "DetokenizationResponseTypedDict",
+ "DetokenizationResult",
+ "DetokenizationResultTypedDict",
"Event",
"FileBuiltInTool",
"FileBuiltInToolType",
@@ -255,21 +243,21 @@
"StreamedChatCompletionChoiceToolCallsTypedDict",
"StreamedChatCompletionChoiceType",
"StreamedChatCompletionChoiceTypedDict",
- "StreamedChatCompletionResponse",
- "StreamedChatCompletionResponseTypedDict",
- "StreamedCompletionResponse",
- "StreamedCompletionResponseData",
- "StreamedCompletionResponseDataTypedDict",
- "StreamedCompletionResponseTypedDict",
+ "StreamedChatCompletionResult",
+ "StreamedChatCompletionResultTypedDict",
+ "StreamedCompletionResult",
+ "StreamedCompletionResultData",
+ "StreamedCompletionResultDataTypedDict",
+ "StreamedCompletionResultTypedDict",
"StreamedCompletionTokenComplete",
"StreamedCompletionTokenCompleteEvent",
"StreamedCompletionTokenCompleteTypedDict",
"StreamedCompletionTokenSampled",
"StreamedCompletionTokenSampledTypedDict",
- "StreamedToolAssistedChatCompletionResponse",
- "StreamedToolAssistedChatCompletionResponseData",
- "StreamedToolAssistedChatCompletionResponseDataTypedDict",
- "StreamedToolAssistedChatCompletionResponseTypedDict",
+ "StreamedToolAssistedChatCompletionResult",
+ "StreamedToolAssistedChatCompletionResultData",
+ "StreamedToolAssistedChatCompletionResultDataTypedDict",
+ "StreamedToolAssistedChatCompletionResultTypedDict",
"SystemMessage",
"SystemMessageTypedDict",
"TOOL_ASSISTED_CHAT_COMPLETION_OP_SERVERS",
@@ -277,24 +265,24 @@
"TextResponseFormatTypedDict",
"TokenSequence",
"TokenSequenceTypedDict",
+ "TokenizationBody",
+ "TokenizationBodyTypedDict",
"TokenizationRequest",
- "TokenizationRequestBody",
- "TokenizationRequestBodyTypedDict",
"TokenizationRequestTypedDict",
- "TokenizationResponse",
- "TokenizationResponseTypedDict",
+ "TokenizationResult",
+ "TokenizationResultTypedDict",
"Tool",
"ToolAssistedChatCompletionRequest",
"ToolAssistedChatCompletionRequestTypedDict",
"ToolAssistedChatCompletionResponse",
"ToolAssistedChatCompletionResponseTypedDict",
- "ToolAssistedCompletionRequestBody",
- "ToolAssistedCompletionRequestBodyToolChoice",
- "ToolAssistedCompletionRequestBodyToolChoiceFunction",
- "ToolAssistedCompletionRequestBodyToolChoiceFunctionTypedDict",
- "ToolAssistedCompletionRequestBodyToolChoiceType",
- "ToolAssistedCompletionRequestBodyToolChoiceTypedDict",
- "ToolAssistedCompletionRequestBodyTypedDict",
+ "ToolAssistedCompletionBody",
+ "ToolAssistedCompletionBodyToolChoice",
+ "ToolAssistedCompletionBodyToolChoiceFunction",
+ "ToolAssistedCompletionBodyToolChoiceFunctionTypedDict",
+ "ToolAssistedCompletionBodyToolChoiceType",
+ "ToolAssistedCompletionBodyToolChoiceTypedDict",
+ "ToolAssistedCompletionBodyTypedDict",
"ToolCalls",
"ToolCallsTypedDict",
"ToolChoice",
diff --git a/src/friendli/models/assistantmessage.py b/src/friendli/models/assistantmessage.py
index 5e24d10..2bef095 100644
--- a/src/friendli/models/assistantmessage.py
+++ b/src/friendli/models/assistantmessage.py
@@ -1,22 +1,16 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
-from typing import List, Optional
+from typing import List, Literal, Optional
from typing_extensions import NotRequired, TypedDict
-class AssistantMessageRole(str, Enum):
- r"""The role of the messages author."""
-
- ASSISTANT = "assistant"
-
-
-class AssistantMessageType(str, Enum):
- r"""The type of tool call."""
+AssistantMessageRole = Literal["assistant"]
+r"""The role of the messages author."""
- FUNCTION = "function"
+AssistantMessageType = Literal["function"]
+r"""The type of tool call."""
class AssistantMessageFunctionTypedDict(TypedDict):
diff --git a/src/friendli/models/chatcompletionrequestbody.py b/src/friendli/models/chatcompletionbody.py
similarity index 98%
rename from src/friendli/models/chatcompletionrequestbody.py
rename to src/friendli/models/chatcompletionbody.py
index cebfb81..3bd7823 100644
--- a/src/friendli/models/chatcompletionrequestbody.py
+++ b/src/friendli/models/chatcompletionbody.py
@@ -4,10 +4,9 @@
from .message import Message, MessageTypedDict
from .textresponseformat import TextResponseFormat, TextResponseFormatTypedDict
from .tool import Tool, ToolTypedDict
-from enum import Enum
from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL
from pydantic import model_serializer
-from typing import List, Union
+from typing import List, Literal, Union
from typing_extensions import NotRequired, TypedDict
@@ -77,10 +76,8 @@ def serialize_model(self, handler):
return m
-class ToolChoiceType(str, Enum):
- r"""The type of the tool. Currently, only `function` is supported."""
-
- FUNCTION = "function"
+ToolChoiceType = Literal["function"]
+r"""The type of the tool. Currently, only `function` is supported."""
class ToolChoiceFunctionTypedDict(TypedDict):
@@ -126,7 +123,7 @@ class Object(BaseModel):
"""
-class ChatCompletionRequestBodyTypedDict(TypedDict):
+class ChatCompletionBodyTypedDict(TypedDict):
model: str
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
messages: List[MessageTypedDict]
@@ -206,7 +203,7 @@ class ChatCompletionRequestBodyTypedDict(TypedDict):
r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument."""
-class ChatCompletionRequestBody(BaseModel):
+class ChatCompletionBody(BaseModel):
model: str
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
diff --git a/src/friendli/models/chatcompletionchoice.py b/src/friendli/models/chatcompletionchoice.py
index 7cf7d89..fbac503 100644
--- a/src/friendli/models/chatcompletionchoice.py
+++ b/src/friendli/models/chatcompletionchoice.py
@@ -2,16 +2,13 @@
from __future__ import annotations
from .logprobs import Logprobs, LogprobsTypedDict
-from enum import Enum
from friendli.types import BaseModel
-from typing import List, Optional
+from typing import List, Literal, Optional
from typing_extensions import NotRequired, TypedDict
-class ChatCompletionChoiceType(str, Enum):
- r"""The type of the tool."""
-
- FUNCTION = "function"
+ChatCompletionChoiceType = Literal["function"]
+r"""The type of the tool."""
class ChatCompletionChoiceFunctionTypedDict(TypedDict):
diff --git a/src/friendli/models/chatcompletionop.py b/src/friendli/models/chatcompletionop.py
index 8d3c67d..c428297 100644
--- a/src/friendli/models/chatcompletionop.py
+++ b/src/friendli/models/chatcompletionop.py
@@ -1,17 +1,11 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from .chatcompletionrequestbody import (
- ChatCompletionRequestBody,
- ChatCompletionRequestBodyTypedDict,
-)
-from .chatcompletionresponse import (
- ChatCompletionResponse,
- ChatCompletionResponseTypedDict,
-)
-from .streamedchatcompletionresponse import (
- StreamedChatCompletionResponse,
- StreamedChatCompletionResponseTypedDict,
+from .chatcompletionbody import ChatCompletionBody, ChatCompletionBodyTypedDict
+from .chatcompletionresult import ChatCompletionResult, ChatCompletionResultTypedDict
+from .streamedchatcompletionresult import (
+ StreamedChatCompletionResult,
+ StreamedChatCompletionResultTypedDict,
)
from friendli.types import BaseModel
from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata
@@ -21,12 +15,17 @@
class ChatCompletionRequestTypedDict(TypedDict):
+ chat_completion_body: ChatCompletionBodyTypedDict
x_friendli_team: NotRequired[str]
r"""ID of team to run requests as (optional parameter)."""
- chat_completion_request_body: NotRequired[ChatCompletionRequestBodyTypedDict]
class ChatCompletionRequest(BaseModel):
+ chat_completion_body: Annotated[
+ ChatCompletionBody,
+ FieldMetadata(request=RequestMetadata(media_type="application/json")),
+ ]
+
x_friendli_team: Annotated[
Optional[str],
pydantic.Field(alias="X-Friendli-Team"),
@@ -34,25 +33,20 @@ class ChatCompletionRequest(BaseModel):
] = None
r"""ID of team to run requests as (optional parameter)."""
- chat_completion_request_body: Annotated[
- Optional[ChatCompletionRequestBody],
- FieldMetadata(request=RequestMetadata(media_type="application/json")),
- ] = None
-
-ChatCompletionResponse1TypedDict = Union[
- ChatCompletionResponseTypedDict,
+ChatCompletionResponseTypedDict = Union[
+ ChatCompletionResultTypedDict,
Union[
- Generator[StreamedChatCompletionResponseTypedDict, None, None],
- AsyncGenerator[StreamedChatCompletionResponseTypedDict, None],
+ Generator[StreamedChatCompletionResultTypedDict, None, None],
+ AsyncGenerator[StreamedChatCompletionResultTypedDict, None],
],
]
-ChatCompletionResponse1 = Union[
- ChatCompletionResponse,
+ChatCompletionResponse = Union[
+ ChatCompletionResult,
Union[
- Generator[StreamedChatCompletionResponse, None, None],
- AsyncGenerator[StreamedChatCompletionResponse, None],
+ Generator[StreamedChatCompletionResult, None, None],
+ AsyncGenerator[StreamedChatCompletionResult, None],
],
]
diff --git a/src/friendli/models/chatcompletionresponse.py b/src/friendli/models/chatcompletionresult.py
similarity index 88%
rename from src/friendli/models/chatcompletionresponse.py
rename to src/friendli/models/chatcompletionresult.py
index b476131..2196cf9 100644
--- a/src/friendli/models/chatcompletionresponse.py
+++ b/src/friendli/models/chatcompletionresult.py
@@ -8,14 +8,14 @@
from typing_extensions import NotRequired, TypedDict
-class ChatCompletionResponseTypedDict(TypedDict):
+class ChatCompletionResultTypedDict(TypedDict):
choices: List[ChatCompletionChoiceTypedDict]
usage: UsageTypedDict
created: NotRequired[int]
r"""The Unix timestamp (in seconds) for when the generation completed."""
-class ChatCompletionResponse(BaseModel):
+class ChatCompletionResult(BaseModel):
choices: List[ChatCompletionChoice]
usage: Usage
diff --git a/src/friendli/models/completionbody.py b/src/friendli/models/completionbody.py
new file mode 100644
index 0000000..91f759a
--- /dev/null
+++ b/src/friendli/models/completionbody.py
@@ -0,0 +1,20 @@
+"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
+
+from __future__ import annotations
+from .completionbodywithprompt import (
+ CompletionBodyWithPrompt,
+ CompletionBodyWithPromptTypedDict,
+)
+from .completionbodywithtokens import (
+ CompletionBodyWithTokens,
+ CompletionBodyWithTokensTypedDict,
+)
+from typing import Union
+
+
+CompletionBodyTypedDict = Union[
+ CompletionBodyWithPromptTypedDict, CompletionBodyWithTokensTypedDict
+]
+
+
+CompletionBody = Union[CompletionBodyWithPrompt, CompletionBodyWithTokens]
diff --git a/src/friendli/models/completionrequestbodywithprompt.py b/src/friendli/models/completionbodywithprompt.py
similarity index 99%
rename from src/friendli/models/completionrequestbodywithprompt.py
rename to src/friendli/models/completionbodywithprompt.py
index 19ca565..56c9d1c 100644
--- a/src/friendli/models/completionrequestbodywithprompt.py
+++ b/src/friendli/models/completionbodywithprompt.py
@@ -9,7 +9,7 @@
from typing_extensions import NotRequired, TypedDict
-class CompletionRequestBodyWithPromptTypedDict(TypedDict):
+class CompletionBodyWithPromptTypedDict(TypedDict):
prompt: str
r"""The prompt (i.e., input text) to generate completion for. Either `prompt` or `tokens` field is required."""
model: str
@@ -106,7 +106,7 @@ class CompletionRequestBodyWithPromptTypedDict(TypedDict):
r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument."""
-class CompletionRequestBodyWithPrompt(BaseModel):
+class CompletionBodyWithPrompt(BaseModel):
prompt: str
r"""The prompt (i.e., input text) to generate completion for. Either `prompt` or `tokens` field is required."""
diff --git a/src/friendli/models/completionrequestbodywithtokens.py b/src/friendli/models/completionbodywithtokens.py
similarity index 99%
rename from src/friendli/models/completionrequestbodywithtokens.py
rename to src/friendli/models/completionbodywithtokens.py
index a04582f..0c1b502 100644
--- a/src/friendli/models/completionrequestbodywithtokens.py
+++ b/src/friendli/models/completionbodywithtokens.py
@@ -9,7 +9,7 @@
from typing_extensions import NotRequired, TypedDict
-class CompletionRequestBodyWithTokensTypedDict(TypedDict):
+class CompletionBodyWithTokensTypedDict(TypedDict):
tokens: List[int]
r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required."""
model: str
@@ -106,7 +106,7 @@ class CompletionRequestBodyWithTokensTypedDict(TypedDict):
r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument."""
-class CompletionRequestBodyWithTokens(BaseModel):
+class CompletionBodyWithTokens(BaseModel):
tokens: List[int]
r"""The tokenized prompt (i.e., input tokens). Either `prompt` or `tokens` field is required."""
diff --git a/src/friendli/models/completionop.py b/src/friendli/models/completionop.py
index f0e65b6..7343c13 100644
--- a/src/friendli/models/completionop.py
+++ b/src/friendli/models/completionop.py
@@ -1,11 +1,11 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from .completionrequestbody import CompletionRequestBody, CompletionRequestBodyTypedDict
-from .completionresponse import CompletionResponse, CompletionResponseTypedDict
-from .streamedcompletionresponse import (
- StreamedCompletionResponse,
- StreamedCompletionResponseTypedDict,
+from .completionbody import CompletionBody, CompletionBodyTypedDict
+from .completionresult import CompletionResult, CompletionResultTypedDict
+from .streamedcompletionresult import (
+ StreamedCompletionResult,
+ StreamedCompletionResultTypedDict,
)
from friendli.types import BaseModel
from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata
@@ -15,12 +15,17 @@
class CompletionRequestTypedDict(TypedDict):
+ completion_body: CompletionBodyTypedDict
x_friendli_team: NotRequired[str]
r"""ID of team to run requests as (optional parameter)."""
- completion_request_body: NotRequired[CompletionRequestBodyTypedDict]
class CompletionRequest(BaseModel):
+ completion_body: Annotated[
+ CompletionBody,
+ FieldMetadata(request=RequestMetadata(media_type="application/json")),
+ ]
+
x_friendli_team: Annotated[
Optional[str],
pydantic.Field(alias="X-Friendli-Team"),
@@ -28,25 +33,20 @@ class CompletionRequest(BaseModel):
] = None
r"""ID of team to run requests as (optional parameter)."""
- completion_request_body: Annotated[
- Optional[CompletionRequestBody],
- FieldMetadata(request=RequestMetadata(media_type="application/json")),
- ] = None
-
-CompletionResponse1TypedDict = Union[
- CompletionResponseTypedDict,
+CompletionResponseTypedDict = Union[
+ CompletionResultTypedDict,
Union[
- Generator[StreamedCompletionResponseTypedDict, None, None],
- AsyncGenerator[StreamedCompletionResponseTypedDict, None],
+ Generator[StreamedCompletionResultTypedDict, None, None],
+ AsyncGenerator[StreamedCompletionResultTypedDict, None],
],
]
-CompletionResponse1 = Union[
- CompletionResponse,
+CompletionResponse = Union[
+ CompletionResult,
Union[
- Generator[StreamedCompletionResponse, None, None],
- AsyncGenerator[StreamedCompletionResponse, None],
+ Generator[StreamedCompletionResult, None, None],
+ AsyncGenerator[StreamedCompletionResult, None],
],
]
diff --git a/src/friendli/models/completionrequestbody.py b/src/friendli/models/completionrequestbody.py
deleted file mode 100644
index 33417f5..0000000
--- a/src/friendli/models/completionrequestbody.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
-
-from __future__ import annotations
-from .completionrequestbodywithprompt import (
- CompletionRequestBodyWithPrompt,
- CompletionRequestBodyWithPromptTypedDict,
-)
-from .completionrequestbodywithtokens import (
- CompletionRequestBodyWithTokens,
- CompletionRequestBodyWithTokensTypedDict,
-)
-from typing import Union
-
-
-CompletionRequestBodyTypedDict = Union[
- CompletionRequestBodyWithPromptTypedDict, CompletionRequestBodyWithTokensTypedDict
-]
-
-
-CompletionRequestBody = Union[
- CompletionRequestBodyWithPrompt, CompletionRequestBodyWithTokens
-]
diff --git a/src/friendli/models/completionresponse.py b/src/friendli/models/completionresult.py
similarity index 84%
rename from src/friendli/models/completionresponse.py
rename to src/friendli/models/completionresult.py
index a84d03c..ede42d6 100644
--- a/src/friendli/models/completionresponse.py
+++ b/src/friendli/models/completionresult.py
@@ -8,12 +8,12 @@
from typing_extensions import TypedDict
-class CompletionResponseTypedDict(TypedDict):
+class CompletionResultTypedDict(TypedDict):
choices: List[CompletionChoiceTypedDict]
usage: UsageTypedDict
-class CompletionResponse(BaseModel):
+class CompletionResult(BaseModel):
choices: List[CompletionChoice]
usage: Usage
diff --git a/src/friendli/models/detokenizationrequestbody.py b/src/friendli/models/detokenizationbody.py
similarity index 88%
rename from src/friendli/models/detokenizationrequestbody.py
rename to src/friendli/models/detokenizationbody.py
index e60e958..1f82208 100644
--- a/src/friendli/models/detokenizationrequestbody.py
+++ b/src/friendli/models/detokenizationbody.py
@@ -6,14 +6,14 @@
from typing_extensions import NotRequired, TypedDict
-class DetokenizationRequestBodyTypedDict(TypedDict):
+class DetokenizationBodyTypedDict(TypedDict):
model: NotRequired[str]
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
tokens: NotRequired[List[int]]
r"""A token sequence to detokenize."""
-class DetokenizationRequestBody(BaseModel):
+class DetokenizationBody(BaseModel):
model: Optional[str] = None
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
diff --git a/src/friendli/models/detokenizationop.py b/src/friendli/models/detokenizationop.py
index 33aec09..f83fc25 100644
--- a/src/friendli/models/detokenizationop.py
+++ b/src/friendli/models/detokenizationop.py
@@ -1,10 +1,7 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from .detokenizationrequestbody import (
- DetokenizationRequestBody,
- DetokenizationRequestBodyTypedDict,
-)
+from .detokenizationbody import DetokenizationBody, DetokenizationBodyTypedDict
from friendli.types import BaseModel
from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata
import pydantic
@@ -13,20 +10,20 @@
class DetokenizationRequestTypedDict(TypedDict):
+ detokenization_body: DetokenizationBodyTypedDict
x_friendli_team: NotRequired[str]
r"""ID of team to run requests as (optional parameter)."""
- detokenization_request_body: NotRequired[DetokenizationRequestBodyTypedDict]
class DetokenizationRequest(BaseModel):
+ detokenization_body: Annotated[
+ DetokenizationBody,
+ FieldMetadata(request=RequestMetadata(media_type="application/json")),
+ ]
+
x_friendli_team: Annotated[
Optional[str],
pydantic.Field(alias="X-Friendli-Team"),
FieldMetadata(header=HeaderMetadata(style="simple", explode=False)),
] = None
r"""ID of team to run requests as (optional parameter)."""
-
- detokenization_request_body: Annotated[
- Optional[DetokenizationRequestBody],
- FieldMetadata(request=RequestMetadata(media_type="application/json")),
- ] = None
diff --git a/src/friendli/models/detokenizationresponse.py b/src/friendli/models/detokenizationresult.py
similarity index 83%
rename from src/friendli/models/detokenizationresponse.py
rename to src/friendli/models/detokenizationresult.py
index d23631d..0c33b4d 100644
--- a/src/friendli/models/detokenizationresponse.py
+++ b/src/friendli/models/detokenizationresult.py
@@ -6,14 +6,14 @@
from typing_extensions import NotRequired, TypedDict
-class DetokenizationResponseTypedDict(TypedDict):
+class DetokenizationResultTypedDict(TypedDict):
r"""Successfully detokenized the tokens."""
text: NotRequired[str]
r"""Detokenized text output."""
-class DetokenizationResponse(BaseModel):
+class DetokenizationResult(BaseModel):
r"""Successfully detokenized the tokens."""
text: Optional[str] = None
diff --git a/src/friendli/models/filebuiltintool.py b/src/friendli/models/filebuiltintool.py
index 2b7c1b2..2eff7e4 100644
--- a/src/friendli/models/filebuiltintool.py
+++ b/src/friendli/models/filebuiltintool.py
@@ -1,16 +1,13 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
-from typing import List
+from typing import List, Literal
from typing_extensions import TypedDict
-class FileBuiltInToolType(str, Enum):
- r"""The type of the file parser tool. Only .txt and .pdf files are supported."""
-
- FILE_TEXT = "file:text"
+FileBuiltInToolType = Literal["file:text"]
+r"""The type of the file parser tool. Only .txt and .pdf files are supported."""
class FileBuiltInToolTypedDict(TypedDict):
diff --git a/src/friendli/models/functiontool.py b/src/friendli/models/functiontool.py
index a7b7cfb..f317662 100644
--- a/src/friendli/models/functiontool.py
+++ b/src/friendli/models/functiontool.py
@@ -2,15 +2,13 @@
from __future__ import annotations
from .function import Function, FunctionTypedDict
-from enum import Enum
from friendli.types import BaseModel
+from typing import Literal
from typing_extensions import TypedDict
-class FunctionToolType(str, Enum):
- r"""The type of the tool."""
-
- FUNCTION = "function"
+FunctionToolType = Literal["function"]
+r"""The type of the tool."""
class FunctionToolTypedDict(TypedDict):
diff --git a/src/friendli/models/otherbuiltintool.py b/src/friendli/models/otherbuiltintool.py
index b24e2bb..22c6679 100644
--- a/src/friendli/models/otherbuiltintool.py
+++ b/src/friendli/models/otherbuiltintool.py
@@ -1,20 +1,20 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
+from typing import Literal
from typing_extensions import TypedDict
-class OtherBuiltInToolType(str, Enum):
- r"""The type of the built-in tool."""
-
- MATH_CALCULATOR = "math:calculator"
- MATH_STATISTICS = "math:statistics"
- MATH_CALENDAR = "math:calendar"
- WEB_SEARCH = "web:search"
- WEB_URL = "web:url"
- CODE_PYTHON_INTERPRETER = "code:python-interpreter"
+OtherBuiltInToolType = Literal[
+ "math:calculator",
+ "math:statistics",
+ "math:calendar",
+ "web:search",
+ "web:url",
+ "code:python-interpreter",
+]
+r"""The type of the built-in tool."""
class OtherBuiltInToolTypedDict(TypedDict):
diff --git a/src/friendli/models/security.py b/src/friendli/models/security.py
index 6cb0961..eb2035a 100644
--- a/src/friendli/models/security.py
+++ b/src/friendli/models/security.py
@@ -8,11 +8,11 @@
class SecurityTypedDict(TypedDict):
- bearer_auth: NotRequired[str]
+ token: NotRequired[str]
class Security(BaseModel):
- bearer_auth: Annotated[
+ token: Annotated[
Optional[str],
FieldMetadata(
security=SecurityMetadata(
diff --git a/src/friendli/models/streamedchatcompletionchoice.py b/src/friendli/models/streamedchatcompletionchoice.py
index 97c4e03..63d844e 100644
--- a/src/friendli/models/streamedchatcompletionchoice.py
+++ b/src/friendli/models/streamedchatcompletionchoice.py
@@ -2,17 +2,14 @@
from __future__ import annotations
from .logprobs import Logprobs, LogprobsTypedDict
-from enum import Enum
from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL
from pydantic import model_serializer
-from typing import Optional
+from typing import Literal, Optional
from typing_extensions import NotRequired, TypedDict
-class StreamedChatCompletionChoiceType(str, Enum):
- r"""The type of the tool."""
-
- FUNCTION = "function"
+StreamedChatCompletionChoiceType = Literal["function"]
+r"""The type of the tool."""
class StreamedChatCompletionChoiceFunctionTypedDict(TypedDict):
diff --git a/src/friendli/models/streamedchatcompletionresponse.py b/src/friendli/models/streamedchatcompletionresult.py
similarity index 90%
rename from src/friendli/models/streamedchatcompletionresponse.py
rename to src/friendli/models/streamedchatcompletionresult.py
index 0e81d09..e5ca89e 100644
--- a/src/friendli/models/streamedchatcompletionresponse.py
+++ b/src/friendli/models/streamedchatcompletionresult.py
@@ -27,13 +27,13 @@ class Data(BaseModel):
usage: Optional[Usage] = None
-class StreamedChatCompletionResponseTypedDict(TypedDict):
+class StreamedChatCompletionResultTypedDict(TypedDict):
r"""A server-sent event containing chat completion content."""
data: DataTypedDict
-class StreamedChatCompletionResponse(BaseModel):
+class StreamedChatCompletionResult(BaseModel):
r"""A server-sent event containing chat completion content."""
data: Data
diff --git a/src/friendli/models/streamedcompletionresponse.py b/src/friendli/models/streamedcompletionresult.py
similarity index 75%
rename from src/friendli/models/streamedcompletionresponse.py
rename to src/friendli/models/streamedcompletionresult.py
index dbf5df2..181ed8a 100644
--- a/src/friendli/models/streamedcompletionresponse.py
+++ b/src/friendli/models/streamedcompletionresult.py
@@ -16,12 +16,12 @@
from typing_extensions import Annotated, TypedDict
-StreamedCompletionResponseDataTypedDict = Union[
+StreamedCompletionResultDataTypedDict = Union[
StreamedCompletionTokenCompleteTypedDict, StreamedCompletionTokenSampledTypedDict
]
-StreamedCompletionResponseData = Annotated[
+StreamedCompletionResultData = Annotated[
Union[
Annotated[StreamedCompletionTokenSampled, Tag("token_sampled")],
Annotated[StreamedCompletionTokenComplete, Tag("complete")],
@@ -30,9 +30,9 @@
]
-class StreamedCompletionResponseTypedDict(TypedDict):
- data: StreamedCompletionResponseDataTypedDict
+class StreamedCompletionResultTypedDict(TypedDict):
+ data: StreamedCompletionResultDataTypedDict
-class StreamedCompletionResponse(BaseModel):
- data: StreamedCompletionResponseData
+class StreamedCompletionResult(BaseModel):
+ data: StreamedCompletionResultData
diff --git a/src/friendli/models/streamedcompletiontokencomplete.py b/src/friendli/models/streamedcompletiontokencomplete.py
index 5db449f..2b43b7b 100644
--- a/src/friendli/models/streamedcompletiontokencomplete.py
+++ b/src/friendli/models/streamedcompletiontokencomplete.py
@@ -3,16 +3,13 @@
from __future__ import annotations
from .completionchoice import CompletionChoice, CompletionChoiceTypedDict
from .usage import Usage, UsageTypedDict
-from enum import Enum
from friendli.types import BaseModel
-from typing import List
+from typing import List, Literal
from typing_extensions import TypedDict
-class StreamedCompletionTokenCompleteEvent(str, Enum):
- r"""Type of server-sent event."""
-
- COMPLETE = "complete"
+StreamedCompletionTokenCompleteEvent = Literal["complete"]
+r"""Type of server-sent event."""
class StreamedCompletionTokenCompleteTypedDict(TypedDict):
diff --git a/src/friendli/models/streamedcompletiontokensampled.py b/src/friendli/models/streamedcompletiontokensampled.py
index 04e16ed..725d311 100644
--- a/src/friendli/models/streamedcompletiontokensampled.py
+++ b/src/friendli/models/streamedcompletiontokensampled.py
@@ -1,15 +1,13 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
+from typing import Literal
from typing_extensions import TypedDict
-class Event(str, Enum):
- r"""Type of server-sent event."""
-
- TOKEN_SAMPLED = "token_sampled"
+Event = Literal["token_sampled"]
+r"""Type of server-sent event."""
class StreamedCompletionTokenSampledTypedDict(TypedDict):
diff --git a/src/friendli/models/streamedtoolassistedchatcompletionresponse.py b/src/friendli/models/streamedtoolassistedchatcompletionresult.py
similarity index 65%
rename from src/friendli/models/streamedtoolassistedchatcompletionresponse.py
rename to src/friendli/models/streamedtoolassistedchatcompletionresult.py
index 9b58dc6..5e54a37 100644
--- a/src/friendli/models/streamedtoolassistedchatcompletionresponse.py
+++ b/src/friendli/models/streamedtoolassistedchatcompletionresult.py
@@ -10,26 +10,26 @@
from typing_extensions import TypedDict
-class StreamedToolAssistedChatCompletionResponseDataTypedDict(TypedDict):
+class StreamedToolAssistedChatCompletionResultDataTypedDict(TypedDict):
choices: List[StreamedChatCompletionChoiceTypedDict]
created: int
r"""The Unix timestamp (in seconds) for when the token sampled."""
-class StreamedToolAssistedChatCompletionResponseData(BaseModel):
+class StreamedToolAssistedChatCompletionResultData(BaseModel):
choices: List[StreamedChatCompletionChoice]
created: int
r"""The Unix timestamp (in seconds) for when the token sampled."""
-class StreamedToolAssistedChatCompletionResponseTypedDict(TypedDict):
+class StreamedToolAssistedChatCompletionResultTypedDict(TypedDict):
r"""A server-sent event containing chat completion content."""
- data: StreamedToolAssistedChatCompletionResponseDataTypedDict
+ data: StreamedToolAssistedChatCompletionResultDataTypedDict
-class StreamedToolAssistedChatCompletionResponse(BaseModel):
+class StreamedToolAssistedChatCompletionResult(BaseModel):
r"""A server-sent event containing chat completion content."""
- data: StreamedToolAssistedChatCompletionResponseData
+ data: StreamedToolAssistedChatCompletionResultData
diff --git a/src/friendli/models/systemmessage.py b/src/friendli/models/systemmessage.py
index 69c863f..3dcba85 100644
--- a/src/friendli/models/systemmessage.py
+++ b/src/friendli/models/systemmessage.py
@@ -1,16 +1,13 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
-from typing import Optional
+from typing import Literal, Optional
from typing_extensions import NotRequired, TypedDict
-class Role(str, Enum):
- r"""The role of the messages author."""
-
- SYSTEM = "system"
+Role = Literal["system"]
+r"""The role of the messages author."""
class SystemMessageTypedDict(TypedDict):
diff --git a/src/friendli/models/textresponseformat.py b/src/friendli/models/textresponseformat.py
index ed20a28..8aab8e2 100644
--- a/src/friendli/models/textresponseformat.py
+++ b/src/friendli/models/textresponseformat.py
@@ -1,19 +1,14 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
import pydantic
-from typing import Optional
+from typing import Literal, Optional
from typing_extensions import Annotated, NotRequired, TypedDict
-class Type(str, Enum):
- r"""Type of the response format."""
-
- TEXT = "text"
- JSON_OBJECT = "json_object"
- REGEX = "regex"
+Type = Literal["text", "json_object", "regex"]
+r"""Type of the response format."""
class TextResponseFormatTypedDict(TypedDict):
diff --git a/src/friendli/models/tokenizationrequestbody.py b/src/friendli/models/tokenizationbody.py
similarity index 87%
rename from src/friendli/models/tokenizationrequestbody.py
rename to src/friendli/models/tokenizationbody.py
index d3d026c..5057ed3 100644
--- a/src/friendli/models/tokenizationrequestbody.py
+++ b/src/friendli/models/tokenizationbody.py
@@ -5,14 +5,14 @@
from typing_extensions import TypedDict
-class TokenizationRequestBodyTypedDict(TypedDict):
+class TokenizationBodyTypedDict(TypedDict):
model: str
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
prompt: str
r"""Input text prompt to tokenize."""
-class TokenizationRequestBody(BaseModel):
+class TokenizationBody(BaseModel):
model: str
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
diff --git a/src/friendli/models/tokenizationop.py b/src/friendli/models/tokenizationop.py
index 7d626b8..055a9fd 100644
--- a/src/friendli/models/tokenizationop.py
+++ b/src/friendli/models/tokenizationop.py
@@ -1,10 +1,7 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from .tokenizationrequestbody import (
- TokenizationRequestBody,
- TokenizationRequestBodyTypedDict,
-)
+from .tokenizationbody import TokenizationBody, TokenizationBodyTypedDict
from friendli.types import BaseModel
from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata
import pydantic
@@ -13,20 +10,20 @@
class TokenizationRequestTypedDict(TypedDict):
+ tokenization_body: TokenizationBodyTypedDict
x_friendli_team: NotRequired[str]
r"""ID of team to run requests as (optional parameter)."""
- tokenization_request_body: NotRequired[TokenizationRequestBodyTypedDict]
class TokenizationRequest(BaseModel):
+ tokenization_body: Annotated[
+ TokenizationBody,
+ FieldMetadata(request=RequestMetadata(media_type="application/json")),
+ ]
+
x_friendli_team: Annotated[
Optional[str],
pydantic.Field(alias="X-Friendli-Team"),
FieldMetadata(header=HeaderMetadata(style="simple", explode=False)),
] = None
r"""ID of team to run requests as (optional parameter)."""
-
- tokenization_request_body: Annotated[
- Optional[TokenizationRequestBody],
- FieldMetadata(request=RequestMetadata(media_type="application/json")),
- ] = None
diff --git a/src/friendli/models/tokenizationresponse.py b/src/friendli/models/tokenizationresult.py
similarity index 84%
rename from src/friendli/models/tokenizationresponse.py
rename to src/friendli/models/tokenizationresult.py
index 106d90a..01f186f 100644
--- a/src/friendli/models/tokenizationresponse.py
+++ b/src/friendli/models/tokenizationresult.py
@@ -6,14 +6,14 @@
from typing_extensions import NotRequired, TypedDict
-class TokenizationResponseTypedDict(TypedDict):
+class TokenizationResultTypedDict(TypedDict):
r"""Successfully tokenized the text."""
tokens: NotRequired[List[int]]
r"""A list of token IDs."""
-class TokenizationResponse(BaseModel):
+class TokenizationResult(BaseModel):
r"""Successfully tokenized the text."""
tokens: Optional[List[int]] = None
diff --git a/src/friendli/models/tool.py b/src/friendli/models/tool.py
index 011da5f..f7e44d9 100644
--- a/src/friendli/models/tool.py
+++ b/src/friendli/models/tool.py
@@ -2,15 +2,13 @@
from __future__ import annotations
from .function import Function, FunctionTypedDict
-from enum import Enum
from friendli.types import BaseModel
+from typing import Literal
from typing_extensions import TypedDict
-class ToolType(str, Enum):
- r"""The type of the tool. Currently, only `function` is supported."""
-
- FUNCTION = "function"
+ToolType = Literal["function"]
+r"""The type of the tool. Currently, only `function` is supported."""
class ToolTypedDict(TypedDict):
diff --git a/src/friendli/models/toolassistedchatcompletionop.py b/src/friendli/models/toolassistedchatcompletionop.py
index 4aae700..8a7426f 100644
--- a/src/friendli/models/toolassistedchatcompletionop.py
+++ b/src/friendli/models/toolassistedchatcompletionop.py
@@ -1,17 +1,14 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from .chatcompletionresponse import (
- ChatCompletionResponse,
- ChatCompletionResponseTypedDict,
+from .chatcompletionresult import ChatCompletionResult, ChatCompletionResultTypedDict
+from .streamedtoolassistedchatcompletionresult import (
+ StreamedToolAssistedChatCompletionResult,
+ StreamedToolAssistedChatCompletionResultTypedDict,
)
-from .streamedtoolassistedchatcompletionresponse import (
- StreamedToolAssistedChatCompletionResponse,
- StreamedToolAssistedChatCompletionResponseTypedDict,
-)
-from .toolassistedcompletionrequestbody import (
- ToolAssistedCompletionRequestBody,
- ToolAssistedCompletionRequestBodyTypedDict,
+from .toolassistedcompletionbody import (
+ ToolAssistedCompletionBody,
+ ToolAssistedCompletionBodyTypedDict,
)
from friendli.types import BaseModel
from friendli.utils import FieldMetadata, HeaderMetadata, RequestMetadata
@@ -26,14 +23,17 @@
class ToolAssistedChatCompletionRequestTypedDict(TypedDict):
+ tool_assisted_completion_body: ToolAssistedCompletionBodyTypedDict
x_friendli_team: NotRequired[str]
r"""ID of team to run requests as (optional parameter)."""
- tool_assisted_completion_request_body: NotRequired[
- ToolAssistedCompletionRequestBodyTypedDict
- ]
class ToolAssistedChatCompletionRequest(BaseModel):
+ tool_assisted_completion_body: Annotated[
+ ToolAssistedCompletionBody,
+ FieldMetadata(request=RequestMetadata(media_type="application/json")),
+ ]
+
x_friendli_team: Annotated[
Optional[str],
pydantic.Field(alias="X-Friendli-Team"),
@@ -41,25 +41,20 @@ class ToolAssistedChatCompletionRequest(BaseModel):
] = None
r"""ID of team to run requests as (optional parameter)."""
- tool_assisted_completion_request_body: Annotated[
- Optional[ToolAssistedCompletionRequestBody],
- FieldMetadata(request=RequestMetadata(media_type="application/json")),
- ] = None
-
ToolAssistedChatCompletionResponseTypedDict = Union[
- ChatCompletionResponseTypedDict,
+ ChatCompletionResultTypedDict,
Union[
- Generator[StreamedToolAssistedChatCompletionResponseTypedDict, None, None],
- AsyncGenerator[StreamedToolAssistedChatCompletionResponseTypedDict, None],
+ Generator[StreamedToolAssistedChatCompletionResultTypedDict, None, None],
+ AsyncGenerator[StreamedToolAssistedChatCompletionResultTypedDict, None],
],
]
ToolAssistedChatCompletionResponse = Union[
- ChatCompletionResponse,
+ ChatCompletionResult,
Union[
- Generator[StreamedToolAssistedChatCompletionResponse, None, None],
- AsyncGenerator[StreamedToolAssistedChatCompletionResponse, None],
+ Generator[StreamedToolAssistedChatCompletionResult, None, None],
+ AsyncGenerator[StreamedToolAssistedChatCompletionResult, None],
],
]
diff --git a/src/friendli/models/toolassistedcompletionrequestbody.py b/src/friendli/models/toolassistedcompletionbody.py
similarity index 94%
rename from src/friendli/models/toolassistedcompletionrequestbody.py
rename to src/friendli/models/toolassistedcompletionbody.py
index 065830c..bfae443 100644
--- a/src/friendli/models/toolassistedcompletionrequestbody.py
+++ b/src/friendli/models/toolassistedcompletionbody.py
@@ -7,45 +7,40 @@
ToolForToolAssistedChat,
ToolForToolAssistedChatTypedDict,
)
-from enum import Enum
from friendli.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL
from pydantic import model_serializer
-from typing import List, Union
+from typing import List, Literal, Union
from typing_extensions import NotRequired, TypedDict
-class ToolAssistedCompletionRequestBodyToolChoiceType(str, Enum):
- r"""The type of the tool. Currently, only `function` is supported."""
-
- FUNCTION = "function"
+ToolAssistedCompletionBodyToolChoiceType = Literal["function"]
+r"""The type of the tool. Currently, only `function` is supported."""
-class ToolAssistedCompletionRequestBodyToolChoiceFunctionTypedDict(TypedDict):
+class ToolAssistedCompletionBodyToolChoiceFunctionTypedDict(TypedDict):
name: str
r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."""
-class ToolAssistedCompletionRequestBodyToolChoiceFunction(BaseModel):
+class ToolAssistedCompletionBodyToolChoiceFunction(BaseModel):
name: str
r"""The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."""
class ToolChoiceObjectTypedDict(TypedDict):
- type: ToolAssistedCompletionRequestBodyToolChoiceType
+ type: ToolAssistedCompletionBodyToolChoiceType
r"""The type of the tool. Currently, only `function` is supported."""
- function: ToolAssistedCompletionRequestBodyToolChoiceFunctionTypedDict
+ function: ToolAssistedCompletionBodyToolChoiceFunctionTypedDict
class ToolChoiceObject(BaseModel):
- type: ToolAssistedCompletionRequestBodyToolChoiceType
+ type: ToolAssistedCompletionBodyToolChoiceType
r"""The type of the tool. Currently, only `function` is supported."""
- function: ToolAssistedCompletionRequestBodyToolChoiceFunction
+ function: ToolAssistedCompletionBodyToolChoiceFunction
-ToolAssistedCompletionRequestBodyToolChoiceTypedDict = Union[
- ToolChoiceObjectTypedDict, str
-]
+ToolAssistedCompletionBodyToolChoiceTypedDict = Union[ToolChoiceObjectTypedDict, str]
r"""Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
@@ -55,7 +50,7 @@ class ToolChoiceObject(BaseModel):
"""
-ToolAssistedCompletionRequestBodyToolChoice = Union[ToolChoiceObject, str]
+ToolAssistedCompletionBodyToolChoice = Union[ToolChoiceObject, str]
r"""Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
@@ -65,7 +60,7 @@ class ToolChoiceObject(BaseModel):
"""
-class ToolAssistedCompletionRequestBodyTypedDict(TypedDict):
+class ToolAssistedCompletionBodyTypedDict(TypedDict):
model: str
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
messages: List[MessageTypedDict]
@@ -126,9 +121,7 @@ class ToolAssistedCompletionRequestBodyTypedDict(TypedDict):
r"""Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument."""
timeout_microseconds: NotRequired[Nullable[int]]
r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout."""
- tool_choice: NotRequired[
- Nullable[ToolAssistedCompletionRequestBodyToolChoiceTypedDict]
- ]
+ tool_choice: NotRequired[Nullable[ToolAssistedCompletionBodyToolChoiceTypedDict]]
r"""Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
@@ -151,7 +144,7 @@ class ToolAssistedCompletionRequestBodyTypedDict(TypedDict):
r"""Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument."""
-class ToolAssistedCompletionRequestBody(BaseModel):
+class ToolAssistedCompletionBody(BaseModel):
model: str
r"""Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models)."""
@@ -229,7 +222,7 @@ class ToolAssistedCompletionRequestBody(BaseModel):
timeout_microseconds: OptionalNullable[int] = UNSET
r"""Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout."""
- tool_choice: OptionalNullable[ToolAssistedCompletionRequestBodyToolChoice] = UNSET
+ tool_choice: OptionalNullable[ToolAssistedCompletionBodyToolChoice] = UNSET
r"""Determines the tool calling behavior of the model.
When set to `none`, the model will bypass tool execution and generate a response directly.
In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message.
diff --git a/src/friendli/models/toolmessage.py b/src/friendli/models/toolmessage.py
index 5f55778..636ecd1 100644
--- a/src/friendli/models/toolmessage.py
+++ b/src/friendli/models/toolmessage.py
@@ -1,16 +1,13 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
-from typing import Optional
+from typing import Literal, Optional
from typing_extensions import NotRequired, TypedDict
-class ToolMessageRole(str, Enum):
- r"""The role of the messages author."""
-
- TOOL = "tool"
+ToolMessageRole = Literal["tool"]
+r"""The role of the messages author."""
class ToolMessageTypedDict(TypedDict):
diff --git a/src/friendli/models/usermessage.py b/src/friendli/models/usermessage.py
index b0f5369..6cb991b 100644
--- a/src/friendli/models/usermessage.py
+++ b/src/friendli/models/usermessage.py
@@ -1,16 +1,13 @@
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
from __future__ import annotations
-from enum import Enum
from friendli.types import BaseModel
-from typing import Optional
+from typing import Literal, Optional
from typing_extensions import NotRequired, TypedDict
-class UserMessageRole(str, Enum):
- r"""The role of the messages author."""
-
- USER = "user"
+UserMessageRole = Literal["user"]
+r"""The role of the messages author."""
class UserMessageTypedDict(TypedDict):
diff --git a/src/friendli/sdk.py b/src/friendli/sdk.py
index d64923b..2cdf362 100644
--- a/src/friendli/sdk.py
+++ b/src/friendli/sdk.py
@@ -22,8 +22,8 @@ class Friendli(BaseSDK):
def __init__(
self,
- bearer_auth: Optional[Union[Optional[str], Callable[[], Optional[str]]]] = None,
- server_idx: Optional[int] = None,
+ token: Optional[Union[Optional[str], Callable[[], Optional[str]]]] = None,
+ server: Optional[str] = None,
server_url: Optional[str] = None,
url_params: Optional[Dict[str, str]] = None,
client: Optional[HttpClient] = None,
@@ -34,8 +34,8 @@ def __init__(
) -> None:
r"""Instantiates the SDK configuring it with the provided parameters.
- :param bearer_auth: The bearer_auth required for authentication
- :param server_idx: The index of the server to use for all methods
+ :param token: The token required for authentication
+ :param server: The server by name to use for all methods
:param server_url: The server URL to use for all methods
:param url_params: Parameters to optionally template the server URL with
:param client: The HTTP client to use for all synchronous methods
@@ -61,10 +61,10 @@ def __init__(
), "The provided async_client must implement the AsyncHttpClient protocol."
security: Any = None
- if callable(bearer_auth):
- security = lambda: models.Security(bearer_auth=bearer_auth()) # pylint: disable=unnecessary-lambda-assignment
+ if callable(token):
+ security = lambda: models.Security(token=token()) # pylint: disable=unnecessary-lambda-assignment
else:
- security = models.Security(bearer_auth=bearer_auth)
+ security = models.Security(token=token)
if server_url is not None:
if url_params is not None:
@@ -77,7 +77,7 @@ def __init__(
async_client=async_client,
security=security,
server_url=server_url,
- server_idx=server_idx,
+ server=server,
retry_config=retry_config,
timeout_ms=timeout_ms,
debug_logger=debug_logger,
diff --git a/src/friendli/sdkconfiguration.py b/src/friendli/sdkconfiguration.py
index 63686ed..61d27ef 100644
--- a/src/friendli/sdkconfiguration.py
+++ b/src/friendli/sdkconfiguration.py
@@ -10,12 +10,14 @@
from typing import Callable, Dict, Optional, Tuple, Union
-SERVERS = [
- "https://inference.friendli.ai",
- # Friendli Serverless Endpoints.
- "https://inference.friendli.ai/dedicated",
- # Friendli Dedicated Endpoints.
-]
+SERVER_SERVERLESS = "serverless"
+r"""Friendli Serverless Endpoints."""
+SERVER_DEDICATED = "dedicated"
+r"""Friendli Dedicated Endpoints."""
+SERVERS = {
+ SERVER_SERVERLESS: "https://inference.friendli.ai",
+ SERVER_DEDICATED: "https://inference.friendli.ai/dedicated",
+}
"""Contains the list of servers available to the SDK"""
@@ -26,12 +28,12 @@ class SDKConfiguration:
debug_logger: Logger
security: Optional[Union[models.Security, Callable[[], models.Security]]] = None
server_url: Optional[str] = ""
- server_idx: Optional[int] = 0
+ server: Optional[str] = ""
language: str = "python"
openapi_doc_version: str = "v1"
- sdk_version: str = "0.1.4"
+ sdk_version: str = "0.6.0"
gen_version: str = "2.452.0"
- user_agent: str = "speakeasy-sdk/python 0.1.4 2.452.0 v1 friendli"
+ user_agent: str = "speakeasy-sdk/python 0.6.0 2.452.0 v1 friendli"
retry_config: OptionalNullable[RetryConfig] = Field(default_factory=lambda: UNSET)
timeout_ms: Optional[int] = None
@@ -41,10 +43,13 @@ def __post_init__(self):
def get_server_details(self) -> Tuple[str, Dict[str, str]]:
if self.server_url is not None and self.server_url:
return remove_suffix(self.server_url, "/"), {}
- if self.server_idx is None:
- self.server_idx = 0
+ if not self.server:
+ self.server = SERVER_SERVERLESS
- return SERVERS[self.server_idx], {}
+ if self.server not in SERVERS:
+ raise ValueError(f'Invalid server "{self.server}"')
+
+ return SERVERS[self.server], {}
def get_hooks(self) -> SDKHooks:
return self._hooks
diff --git a/src/friendli/serverless.py b/src/friendli/serverless.py
index 72eba2a..c05571a 100644
--- a/src/friendli/serverless.py
+++ b/src/friendli/serverless.py
@@ -6,7 +6,7 @@
from friendli._hooks import HookContext
from friendli.types import OptionalNullable, UNSET
from friendli.utils import eventstreaming, get_security_from_env
-from typing import Optional, Union
+from typing import List, Optional, Union
class ToolAssistedChatCompletionAcceptEnum(str, Enum):
@@ -18,13 +18,40 @@ class Serverless(BaseSDK):
def tool_assisted_chat_completion(
self,
*,
+ model: str,
+ messages: Union[List[models.Message], List[models.MessageTypedDict]],
x_friendli_team: Optional[str] = None,
- tool_assisted_completion_request_body: Optional[
+ eos_token: OptionalNullable[List[int]] = UNSET,
+ frequency_penalty: OptionalNullable[float] = UNSET,
+ max_tokens: OptionalNullable[int] = UNSET,
+ min_tokens: OptionalNullable[int] = 0,
+ n: OptionalNullable[int] = 1,
+ parallel_tool_calls: OptionalNullable[bool] = UNSET,
+ presence_penalty: OptionalNullable[float] = UNSET,
+ repetition_penalty: OptionalNullable[float] = UNSET,
+ response_format: OptionalNullable[
+ Union[models.TextResponseFormat, models.TextResponseFormatTypedDict]
+ ] = UNSET,
+ resume_generation: OptionalNullable[bool] = UNSET,
+ seed: OptionalNullable[List[int]] = UNSET,
+ stop: OptionalNullable[List[str]] = UNSET,
+ stream: OptionalNullable[bool] = UNSET,
+ temperature: OptionalNullable[float] = 1,
+ timeout_microseconds: OptionalNullable[int] = UNSET,
+ tool_choice: OptionalNullable[
Union[
- models.ToolAssistedCompletionRequestBody,
- models.ToolAssistedCompletionRequestBodyTypedDict,
+ models.ToolAssistedCompletionBodyToolChoice,
+ models.ToolAssistedCompletionBodyToolChoiceTypedDict,
]
- ] = None,
+ ] = UNSET,
+ tools: OptionalNullable[
+ Union[
+ List[models.ToolForToolAssistedChat],
+ List[models.ToolForToolAssistedChatTypedDict],
+ ]
+ ] = UNSET,
+ top_k: OptionalNullable[int] = 0,
+ top_p: OptionalNullable[float] = 1,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
@@ -34,8 +61,28 @@ def tool_assisted_chat_completion(
Given a list of messages forming a conversation, the model generates a response. Additionally, the model can utilize built-in tools for tool calls, enhancing its capability to provide more comprehensive and actionable responses.
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param messages: A list of messages comprising the conversation so far.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param tool_assisted_completion_request_body:
+ :param eos_token: A list of endpoint sentence tokens.
+ :param frequency_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
+ :param max_tokens: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
+ :param min_tokens: The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. **This field is unsupported when `tools` are specified.**
+ :param n: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
+ :param parallel_tool_calls: Whether to enable parallel function calling.
+ :param presence_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
+ :param repetition_penalty: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
+ :param response_format: The enforced format of the model's output. Note that the content of the output message may be truncated if it exceeds the `max_tokens`. You can check this by verifying that the `finish_reason` of the output message is `length`. ***Important*** You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). Otherwise, the model may result in an unending stream of whitespace or other characters.
+ :param resume_generation: Enable to continue text generation even after an error occurs during a tool call. Note that enabling this option may use more tokens, as the system generates additional content to handle errors gracefully. However, if the system fails more than 8 times, the generation will stop regardless. ***Tip*** This is useful in scenarios where you want to maintain text generation flow despite errors, such as when generating long-form content. The user will not be interrupted by tool call issues, ensuring a smoother experience.
+ :param seed: Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
+ :param stop: When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.
+ :param stream: Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. **Caution: `stream: false` is unsupported now.**
+ :param temperature: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
+ :param timeout_microseconds: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
+ :param tool_choice: Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`.
+ :param tools: A list of tools the model may call. A maximum of 128 functions is supported. Use this to provide a list of functions the model may generate JSON inputs for. For more detailed information about each tool, please refer [here](https://docs.friendli.ai/guides/serverless_endpoints/tools/built_in_tools). **When `tools` are specified, `min_tokens` field is unsupported.**
+ :param top_k: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
+ :param top_p: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -53,9 +100,35 @@ def tool_assisted_chat_completion(
request = models.ToolAssistedChatCompletionRequest(
x_friendli_team=x_friendli_team,
- tool_assisted_completion_request_body=utils.get_pydantic_model(
- tool_assisted_completion_request_body,
- Optional[models.ToolAssistedCompletionRequestBody],
+ tool_assisted_completion_body=models.ToolAssistedCompletionBody(
+ model=model,
+ messages=utils.get_pydantic_model(messages, List[models.Message]),
+ eos_token=eos_token,
+ frequency_penalty=frequency_penalty,
+ max_tokens=max_tokens,
+ min_tokens=min_tokens,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ presence_penalty=presence_penalty,
+ repetition_penalty=repetition_penalty,
+ response_format=utils.get_pydantic_model(
+ response_format, OptionalNullable[models.TextResponseFormat]
+ ),
+ resume_generation=resume_generation,
+ seed=seed,
+ stop=stop,
+ stream=stream,
+ temperature=temperature,
+ timeout_microseconds=timeout_microseconds,
+ tool_choice=utils.get_pydantic_model(
+ tool_choice,
+ OptionalNullable[models.ToolAssistedCompletionBodyToolChoice],
+ ),
+ tools=utils.get_pydantic_model(
+ tools, OptionalNullable[List[models.ToolForToolAssistedChat]]
+ ),
+ top_k=top_k,
+ top_p=top_p,
),
)
@@ -65,7 +138,7 @@ def tool_assisted_chat_completion(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
@@ -74,11 +147,11 @@ def tool_assisted_chat_completion(
else "application/json;q=1, text/event-stream;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.tool_assisted_completion_request_body,
+ request.tool_assisted_completion_body,
+ False,
False,
- True,
"json",
- Optional[models.ToolAssistedCompletionRequestBody],
+ models.ToolAssistedCompletionBody,
),
timeout_ms=timeout_ms,
)
@@ -107,14 +180,12 @@ def tool_assisted_chat_completion(
if utils.match_response(http_res, "200", "application/json"):
http_response_text = utils.stream_to_text(http_res)
- return utils.unmarshal_json(
- http_response_text, models.ChatCompletionResponse
- )
+ return utils.unmarshal_json(http_response_text, models.ChatCompletionResult)
if utils.match_response(http_res, "200", "text/event-stream"):
return eventstreaming.stream_events(
http_res,
lambda raw: utils.unmarshal_json(
- raw, models.StreamedToolAssistedChatCompletionResponse
+ raw, models.StreamedToolAssistedChatCompletionResult
),
sentinel="[DONE]",
)
@@ -136,13 +207,40 @@ def tool_assisted_chat_completion(
async def tool_assisted_chat_completion_async(
self,
*,
+ model: str,
+ messages: Union[List[models.Message], List[models.MessageTypedDict]],
x_friendli_team: Optional[str] = None,
- tool_assisted_completion_request_body: Optional[
+ eos_token: OptionalNullable[List[int]] = UNSET,
+ frequency_penalty: OptionalNullable[float] = UNSET,
+ max_tokens: OptionalNullable[int] = UNSET,
+ min_tokens: OptionalNullable[int] = 0,
+ n: OptionalNullable[int] = 1,
+ parallel_tool_calls: OptionalNullable[bool] = UNSET,
+ presence_penalty: OptionalNullable[float] = UNSET,
+ repetition_penalty: OptionalNullable[float] = UNSET,
+ response_format: OptionalNullable[
+ Union[models.TextResponseFormat, models.TextResponseFormatTypedDict]
+ ] = UNSET,
+ resume_generation: OptionalNullable[bool] = UNSET,
+ seed: OptionalNullable[List[int]] = UNSET,
+ stop: OptionalNullable[List[str]] = UNSET,
+ stream: OptionalNullable[bool] = UNSET,
+ temperature: OptionalNullable[float] = 1,
+ timeout_microseconds: OptionalNullable[int] = UNSET,
+ tool_choice: OptionalNullable[
Union[
- models.ToolAssistedCompletionRequestBody,
- models.ToolAssistedCompletionRequestBodyTypedDict,
+ models.ToolAssistedCompletionBodyToolChoice,
+ models.ToolAssistedCompletionBodyToolChoiceTypedDict,
]
- ] = None,
+ ] = UNSET,
+ tools: OptionalNullable[
+ Union[
+ List[models.ToolForToolAssistedChat],
+ List[models.ToolForToolAssistedChatTypedDict],
+ ]
+ ] = UNSET,
+ top_k: OptionalNullable[int] = 0,
+ top_p: OptionalNullable[float] = 1,
retries: OptionalNullable[utils.RetryConfig] = UNSET,
server_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
@@ -152,8 +250,28 @@ async def tool_assisted_chat_completion_async(
Given a list of messages forming a conversation, the model generates a response. Additionally, the model can utilize built-in tools for tool calls, enhancing its capability to provide more comprehensive and actionable responses.
+ :param model: Code of the model to use. See [available model list](https://docs.friendli.ai/guides/serverless_endpoints/pricing#text-generation-models).
+ :param messages: A list of messages comprising the conversation so far.
:param x_friendli_team: ID of team to run requests as (optional parameter).
- :param tool_assisted_completion_request_body:
+ :param eos_token: A list of endpoint sentence tokens.
+ :param frequency_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled, taking into account their frequency in the preceding text. This penalization diminishes the model's tendency to reproduce identical lines verbatim.
+ :param max_tokens: The maximum number of tokens to generate. For decoder-only models like GPT, the length of your input tokens plus `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI GPT-3). For encoder-decoder models like T5 or BlenderBot, `max_tokens` should not exceed the model's maximum output length. This is similar to Hugging Face's [`max_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.max_new_tokens) argument.
+ :param min_tokens: The minimum number of tokens to generate. Default value is 0. This is similar to Hugging Face's [`min_new_tokens`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.min_new_tokens) argument. **This field is unsupported when `tools` are specified.**
+ :param n: The number of independently generated results for the prompt. Not supported when using beam search. Defaults to 1. This is similar to Hugging Face's [`num_return_sequences`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.num_return_sequences) argument.
+ :param parallel_tool_calls: Whether to enable parallel function calling.
+ :param presence_penalty: Number between -2.0 and 2.0. Positive values penalizes tokens that have been sampled at least once in the existing text.
+ :param repetition_penalty: Penalizes tokens that have already appeared in the generated result (plus the input tokens for decoder-only models). Should be greater than or equal to 1.0 (1.0 means no penalty). See [keskar et al., 2019](https://arxiv.org/abs/1909.05858) for more details. This is similar to Hugging Face's [`repetition_penalty`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.repetition_penalty) argument.
+ :param response_format: The enforced format of the model's output. Note that the content of the output message may be truncated if it exceeds the `max_tokens`. You can check this by verifying that the `finish_reason` of the output message is `length`. ***Important*** You must explicitly instruct the model to produce the desired output format using a system prompt or user message (e.g., `You are an API generating a valid JSON as output.`). Otherwise, the model may result in an unending stream of whitespace or other characters.
+ :param resume_generation: Enable to continue text generation even after an error occurs during a tool call. Note that enabling this option may use more tokens, as the system generates additional content to handle errors gracefully. However, if the system fails more than 8 times, the generation will stop regardless. ***Tip*** This is useful in scenarios where you want to maintain text generation flow despite errors, such as when generating long-form content. The user will not be interrupted by tool call issues, ensuring a smoother experience.
+ :param seed: Seed to control random procedure. If nothing is given, random seed is used for sampling, and return the seed along with the generated result. When using the `n` argument, you can pass a list of seed values to control all of the independent generations.
+ :param stop: When one of the stop phrases appears in the generation result, the API will stop generation. The stop phrases are excluded from the result. Defaults to empty list.
+ :param stream: Whether to stream generation result. When set true, each token will be sent as [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) once generated. **Caution: `stream: false` is unsupported now.**
+ :param temperature: Sampling temperature. Smaller temperature makes the generation result closer to greedy, argmax (i.e., `top_k = 1`) sampling. Defaults to 1.0. This is similar to Hugging Face's [`temperature`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.generationconfig.temperature) argument.
+ :param timeout_microseconds: Request timeout. Gives the `HTTP 429 Too Many Requests` response status code. Default behavior is no timeout.
+ :param tool_choice: Determines the tool calling behavior of the model. When set to `none`, the model will bypass tool execution and generate a response directly. In `auto` mode (the default), the model dynamically decides whether to call a tool or respond with a message. Alternatively, setting `required` ensures that the model invokes at least one tool before responding to the user. You can also specify a particular tool by `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}`.
+ :param tools: A list of tools the model may call. A maximum of 128 functions is supported. Use this to provide a list of functions the model may generate JSON inputs for. For more detailed information about each tool, please refer [here](https://docs.friendli.ai/guides/serverless_endpoints/tools/built_in_tools). **When `tools` are specified, `min_tokens` field is unsupported.**
+ :param top_k: The number of highest probability tokens to keep for sampling. Numbers between 0 and the vocab size of the model (both inclusive) are allowed. The default value is 0, which means that the API does not apply top-k filtering. This is similar to Hugging Face's [`top_k`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_k) argument.
+ :param top_p: Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers between 0.0 (exclusive) and 1.0 (inclusive) are allowed. Defaults to 1.0. This is similar to Hugging Face's [`top_p`](https://huggingface.co/docs/transformers/v4.26.0/en/main_classes/text_generation#transformers.GenerationConfig.top_p) argument.
:param retries: Override the default retry configuration for this method
:param server_url: Override the default server URL for this method
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -171,9 +289,35 @@ async def tool_assisted_chat_completion_async(
request = models.ToolAssistedChatCompletionRequest(
x_friendli_team=x_friendli_team,
- tool_assisted_completion_request_body=utils.get_pydantic_model(
- tool_assisted_completion_request_body,
- Optional[models.ToolAssistedCompletionRequestBody],
+ tool_assisted_completion_body=models.ToolAssistedCompletionBody(
+ model=model,
+ messages=utils.get_pydantic_model(messages, List[models.Message]),
+ eos_token=eos_token,
+ frequency_penalty=frequency_penalty,
+ max_tokens=max_tokens,
+ min_tokens=min_tokens,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ presence_penalty=presence_penalty,
+ repetition_penalty=repetition_penalty,
+ response_format=utils.get_pydantic_model(
+ response_format, OptionalNullable[models.TextResponseFormat]
+ ),
+ resume_generation=resume_generation,
+ seed=seed,
+ stop=stop,
+ stream=stream,
+ temperature=temperature,
+ timeout_microseconds=timeout_microseconds,
+ tool_choice=utils.get_pydantic_model(
+ tool_choice,
+ OptionalNullable[models.ToolAssistedCompletionBodyToolChoice],
+ ),
+ tools=utils.get_pydantic_model(
+ tools, OptionalNullable[List[models.ToolForToolAssistedChat]]
+ ),
+ top_k=top_k,
+ top_p=top_p,
),
)
@@ -183,7 +327,7 @@ async def tool_assisted_chat_completion_async(
base_url=base_url,
url_variables=url_variables,
request=request,
- request_body_required=False,
+ request_body_required=True,
request_has_path_params=False,
request_has_query_params=True,
user_agent_header="user-agent",
@@ -192,11 +336,11 @@ async def tool_assisted_chat_completion_async(
else "application/json;q=1, text/event-stream;q=0",
security=self.sdk_configuration.security,
get_serialized_body=lambda: utils.serialize_request_body(
- request.tool_assisted_completion_request_body,
+ request.tool_assisted_completion_body,
+ False,
False,
- True,
"json",
- Optional[models.ToolAssistedCompletionRequestBody],
+ models.ToolAssistedCompletionBody,
),
timeout_ms=timeout_ms,
)
@@ -225,14 +369,12 @@ async def tool_assisted_chat_completion_async(
if utils.match_response(http_res, "200", "application/json"):
http_response_text = await utils.stream_to_text_async(http_res)
- return utils.unmarshal_json(
- http_response_text, models.ChatCompletionResponse
- )
+ return utils.unmarshal_json(http_response_text, models.ChatCompletionResult)
if utils.match_response(http_res, "200", "text/event-stream"):
return eventstreaming.stream_events_async(
http_res,
lambda raw: utils.unmarshal_json(
- raw, models.StreamedToolAssistedChatCompletionResponse
+ raw, models.StreamedToolAssistedChatCompletionResult
),
sentinel="[DONE]",
)
diff --git a/src/friendli/utils/security.py b/src/friendli/utils/security.py
index 55bafe0..45ffe61 100644
--- a/src/friendli/utils/security.py
+++ b/src/friendli/utils/security.py
@@ -64,8 +64,8 @@ def get_security_from_env(security: Any, security_class: Any) -> Optional[BaseMo
security_dict: Any = {}
- if os.getenv("FRIENDLI_BEARER_AUTH"):
- security_dict["bearer_auth"] = os.getenv("FRIENDLI_BEARER_AUTH")
+ if os.getenv("FRIENDLI_TOKEN"):
+ security_dict["token"] = os.getenv("FRIENDLI_TOKEN")
return security_class(**security_dict) if security_dict else None