Skip to content

Commit

Permalink
feat: support gpt-4o-audio-preview
Browse files Browse the repository at this point in the history
  • Loading branch information
Calcium-Ion committed Nov 4, 2024
1 parent 8b8abfa commit 139a104
Show file tree
Hide file tree
Showing 5 changed files with 2,297 additions and 1,866 deletions.
6 changes: 3 additions & 3 deletions common/model-ratio.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,15 @@ func GetCompletionRatio(name string) float64 {
name = "gpt-4o-gizmo-*"
}
if strings.HasPrefix(name, "gpt-4") && !strings.HasSuffix(name, "-all") && !strings.HasSuffix(name, "-gizmo-*") {
if strings.HasPrefix(name, "gpt-4-turbo") || strings.HasSuffix(name, "preview") {
return 3
}
if strings.HasPrefix(name, "gpt-4o") {
if name == "gpt-4o-2024-05-13" {
return 3
}
return 4
}
if strings.HasPrefix(name, "gpt-4-turbo") || strings.HasSuffix(name, "preview") {
return 3
}
return 2
}
if strings.HasPrefix(name, "o1-") {
Expand Down
31 changes: 24 additions & 7 deletions dto/text_request.go → dto/openai_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ type GeneralOpenAIRequest struct {
LogProbs bool `json:"logprobs,omitempty"`
TopLogProbs int `json:"top_logprobs,omitempty"`
Dimensions int `json:"dimensions,omitempty"`
Modalities any `json:"modalities,omitempty"`
Audio any `json:"audio,omitempty"`
}

type OpenAITools struct {
Expand Down Expand Up @@ -83,19 +85,26 @@ type Message struct {
}

type MediaMessage struct {
Type string `json:"type"`
Text string `json:"text"`
ImageUrl any `json:"image_url,omitempty"`
Type string `json:"type"`
Text string `json:"text"`
ImageUrl any `json:"image_url,omitempty"`
InputAudio any `json:"input_audio,omitempty"`
}

type MessageImageUrl struct {
Url string `json:"url"`
Detail string `json:"detail"`
}

type MessageInputAudio struct {
Data string `json:"data"` //base64
Format string `json:"format"`
}

const (
ContentTypeText = "text"
ContentTypeImageURL = "image_url"
ContentTypeText = "text"
ContentTypeImageURL = "image_url"
ContentTypeInputAudio = "input_audio"
)

func (m Message) StringContent() string {
Expand Down Expand Up @@ -168,11 +177,19 @@ func (m Message) ParseContent() []MediaMessage {
},
})
}

case ContentTypeInputAudio:
if subObj, ok := contentMap["input_audio"].(map[string]any); ok {
contentList = append(contentList, MediaMessage{
Type: ContentTypeInputAudio,
InputAudio: MessageInputAudio{
Data: subObj["data"].(string),
Format: subObj["format"].(string),
},
})
}
}
}
return contentList
}

return nil
}
File renamed without changes.
5 changes: 4 additions & 1 deletion service/token_counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,17 @@ func CountTokenMessages(messages []dto.Message, model string, stream bool) (int,
} else {
arrayContent := message.ParseContent()
for _, m := range arrayContent {
if m.Type == "image_url" {
if m.Type == dto.ContentTypeImageURL {
imageUrl := m.ImageUrl.(dto.MessageImageUrl)
imageTokenNum, err := getImageToken(&imageUrl, model, stream)
if err != nil {
return 0, err
}
tokenNum += imageTokenNum
log.Printf("image token num: %d", imageTokenNum)
} else if m.Type == dto.ContentTypeInputAudio {
// TODO: 音频token数量计算
tokenNum += 100
} else {
tokenNum += getTokenNum(tokenEncoder, m.Text)
}
Expand Down
Loading

0 comments on commit 139a104

Please sign in to comment.