diff --git a/common/adapters.ts b/common/adapters.ts index fdcaec9fe..080ae2a8b 100644 --- a/common/adapters.ts +++ b/common/adapters.ts @@ -248,6 +248,22 @@ export const OPENAI_CHAT_MODELS: Record = { [OPENAI_MODELS.O1_Mini_20240912]: true, } +export type GoogleModel = keyof typeof GOOGLE_MODELS + +export const GOOGLE_MODELS = { + GEMINI_15_PRO: { id: 'gemini-1.5-pro', label: 'Gemini 1.5 Pro' }, + GEMINI_10_PRO_LATEST: { id: 'gemini-1.0-pro-latest', label: 'Gemini 1.0 Pro' }, + GEMINI_15_FLASH: { id: 'gemini-1.5-flash', label: 'Gemini 1.5 Flash' }, + GEMINI_15_FLASH_8B: { id: 'gemini-1.5-flash-8b', label: 'Gemini 1.5 Flash 8B' }, +} + +export const GOOGLE_LIMITS: Record = { + 'gemini-1.5-pro': 2097152, + 'gemini-1.0-pro-latest': 32768, + 'gemini-1.5-flash': 1048576, + 'gemini-1.5-flash-8b': 1048576, +} + /** Note: claude-v1 and claude-instant-v1 not included as they may point * to different models in the future. New models may be less appropriate * for roleplaying so they should be updated to manually diff --git a/common/presets.ts b/common/presets.ts index 005556f71..933bf9bb6 100644 --- a/common/presets.ts +++ b/common/presets.ts @@ -56,6 +56,8 @@ export const presetValidator = { gaslight: 'string?', oaiModel: 'string', openRouterModel: 'any?', + featherlessModel: 'string?', + googleModel: 'string?', mirostatTau: 'number?', mirostatLR: 'number?', diff --git a/common/presets/templates.ts b/common/presets/templates.ts index ebf711d98..b9eba2ae7 100644 --- a/common/presets/templates.ts +++ b/common/presets/templates.ts @@ -24,7 +24,7 @@ export type FormatTags = { closeSystem: string } -export type ModelFormat = 'Alpaca' | 'Vicuna' | 'ChatML' | 'Mistral' | 'Llama3' +export type ModelFormat = 'Alpaca' | 'Vicuna' | 'ChatML' | 'Mistral' | 'Llama3' | 'None' export const BUILTIN_FORMATS: { [key in ModelFormat]: FormatTags } = { Alpaca: { @@ -67,6 +67,14 @@ export const BUILTIN_FORMATS: { [key in ModelFormat]: FormatTags } = { openBot: `<|start_header_id|>assistant<|end_header_id|>`, closeBot: `<|eot_id|>`, }, + None: { + openSystem: '', + closeSystem: '', + openUser: '', + closeUser: '', + openBot: '', + closeBot: '', + }, } export function replaceTags(prompt: string, format: FormatTags | ModelFormat) { diff --git a/common/prompt-order.ts b/common/prompt-order.ts index 9331c9c60..b1d24542e 100644 --- a/common/prompt-order.ts +++ b/common/prompt-order.ts @@ -1,6 +1,13 @@ import { AppSchema } from './types' import { neat } from './util' +export type OrderOptions = { + format?: string + order?: NonNullable + gen?: AppSchema.UserGenPreset + char?: AppSchema.Character +} + export function promptOrderToTemplate( format: string, order: NonNullable @@ -27,10 +34,57 @@ export function promptOrderToTemplate( .trim() } +export function promptOrderToSections(opts: OrderOptions) { + const order = (opts.order || SIMPLE_ORDER).filter( + (o) => + o.placeholder !== 'system_prompt' && + o.placeholder !== 'ujb' && + o.placeholder !== 'post' && + o.placeholder !== 'history' && + o.enabled + ) + const holders = opts.format + ? formatHolders[opts.format] || formatHolders.Universal + : formatHolders.Universal + + const system = holders.system + const defs = order.map((o) => holders[o.placeholder]).join('\n') + const history = holders.history + const post = holders.post + + return { + system, + defs: `${defs}`, + history, + post, + } +} + +// export function promptOrderToMessages(opts: OrderOptions) { +// const sections = promptOrderToSections(opts) + +// return [ +// { role: 'system', content: sections.system }, +// { role: 'user', content: sections.defs }, + +// ] +// } + function getOrderHolder(format: string, holder: string) { return formatHolders[format]?.[holder] || formatHolders.Universal[holder] || '' } +export const SIMPLE_ORDER: NonNullable = [ + 'system_prompt', + 'scenario', + 'personality', + 'impersonating', + 'chat_embed', + 'memory', + 'example_dialogue', + 'history', +].map((placeholder) => ({ placeholder, enabled: true })) + export const formatHolders: Record> = { Universal: { system_prompt: neat`{{#if system_prompt}}{{value}}{{#else}}Write "{{char}}'s" next reply in a fictional roleplay chat between "{{char}}" and "{{user}}"{{/else}}{{/if}}`, diff --git a/common/prompt.ts b/common/prompt.ts index bfc3c8c7e..7f26d402b 100644 --- a/common/prompt.ts +++ b/common/prompt.ts @@ -1,6 +1,12 @@ import type { GenerateRequestV2 } from '../srv/adapter/type' import type { AppSchema, TokenCounter } from './types' -import { AIAdapter, NOVEL_MODELS, OPENAI_CONTEXTS, THIRDPARTY_HANDLERS } from './adapters' +import { + AIAdapter, + GOOGLE_LIMITS, + NOVEL_MODELS, + OPENAI_CONTEXTS, + THIRDPARTY_HANDLERS, +} from './adapters' import { formatCharacter } from './characters' import { defaultTemplate } from './mode-templates' import { buildMemoryPrompt } from './memory' @@ -8,7 +14,7 @@ import { defaultPresets, getFallbackPreset, isDefaultPreset } from './presets' import { parseTemplate } from './template-parser' import { getMessageAuthor, getBotName, trimSentence, neat } from './util' import { Memory } from './types' -import { promptOrderToTemplate } from './prompt-order' +import { promptOrderToTemplate, SIMPLE_ORDER } from './prompt-order' import { ModelFormat, replaceTags } from './presets/templates' export type TickHandler = (response: string, state: InferenceState, json?: T) => void @@ -292,7 +298,7 @@ export function getTemplate(opts: Pick) const fallback = getFallbackPreset(opts.settings?.service!) if (opts.settings?.useAdvancedPrompt === 'basic' || opts.settings?.presetMode === 'simple') { if (opts.settings.presetMode === 'simple') { - const template = promptOrderToTemplate('Universal', simpleOrder) + const template = promptOrderToTemplate('Universal', SIMPLE_ORDER) return template } @@ -322,17 +328,6 @@ type InjectOpts = { encoder: TokenCounter } -const simpleOrder: NonNullable = [ - 'system_prompt', - 'scenario', - 'personality', - 'impersonating', - 'chat_embed', - 'memory', - 'example_dialogue', - 'history', -].map((placeholder) => ({ placeholder, enabled: true })) - export async function injectPlaceholders(template: string, inject: InjectOpts) { const { opts, parts, history: hist, encoder, ...rest } = inject @@ -898,10 +893,22 @@ export function getContextLimit( // Any LLM could be used here so don't max any assumptions case 'ooba': case 'petals': - case 'kobold': case 'horde': return configuredMax - genAmount + case 'kobold': { + if (!gen.useMaxContext) return configuredMax - genAmount + switch (gen.thirdPartyFormat) { + case 'gemini': { + const max = GOOGLE_LIMITS[gen.googleModel!] + return max ? max - genAmount : configuredMax - genAmount + } + + default: + return configuredMax - genAmount + } + } + case 'novel': { const model = gen?.novelModel || NOVEL_MODELS.kayra_v1 if (model === NOVEL_MODELS.clio_v1 || model === NOVEL_MODELS.kayra_v1) { diff --git a/common/types/presets.ts b/common/types/presets.ts index 072cc3cfb..aad33a948 100644 --- a/common/types/presets.ts +++ b/common/types/presets.ts @@ -150,6 +150,8 @@ export interface GenSettings { claudeModel?: string mistralModel?: string openRouterModel?: OpenRouterModel + googleModel?: string + featherlessModel?: string thirdPartyUrl?: string thirdPartyFormat?: ThirdPartyFormat diff --git a/package.json b/package.json index 8ec29ed32..7941b05e8 100644 --- a/package.json +++ b/package.json @@ -104,6 +104,7 @@ "multer": "1.4.5-lts.1", "nai-js-tokenizer": "1.0.1", "needle": "^3.2.0", + "partial-json": "^0.1.7", "patreon-api.ts": "^0.1.0", "peggy": "^3.0.2", "pino": "^8.10.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9046fa2a7..3dd481ce1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -95,6 +95,9 @@ dependencies: needle: specifier: ^3.2.0 version: 3.2.0 + partial-json: + specifier: ^0.1.7 + version: 0.1.7 patreon-api.ts: specifier: ^0.1.0 version: registry.npmjs.org/patreon-api.ts@0.1.0 @@ -6871,6 +6874,10 @@ packages: engines: {node: '>= 0.8'} dev: false + /partial-json@0.1.7: + resolution: {integrity: sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==, tarball: https://registry.npmjs.org/partial-json/-/partial-json-0.1.7.tgz} + dev: false + /path-browserify@1.0.0: resolution: {integrity: sha512-Hkavx/nY4/plImrZPHRk2CL9vpOymZLgEbMNX1U0bjcBL7QN9wODxyx0yaMZURSQaUtSEvDrfAvxa9oPb0at9g==} dev: true diff --git a/srv/adapter/agnaistic.ts b/srv/adapter/agnaistic.ts index 78e91815c..5bd85bb5a 100644 --- a/srv/adapter/agnaistic.ts +++ b/srv/adapter/agnaistic.ts @@ -27,6 +27,7 @@ import { handleVenus } from './venus' import { sanitise, sanitiseAndTrim, trimResponseV2 } from '/common/requests/util' import { obtainLock, releaseLock } from '../api/chat/lock' import { getServerConfiguration } from '../db/admin' +import { handleGemini } from './gemini' export type SubscriptionPreset = Awaited>> @@ -416,7 +417,13 @@ export function getHandlers(settings: Partial) { case 'kobold': case 'openai': return handlers[settings.thirdPartyFormat!] + + case 'featherless': + return handlers.kobold + + case 'gemini': + return handleGemini } - return handlers.ooba + return handleThirdParty } diff --git a/srv/adapter/featherless.ts b/srv/adapter/featherless.ts new file mode 100644 index 000000000..da56349ea --- /dev/null +++ b/srv/adapter/featherless.ts @@ -0,0 +1,51 @@ +import { logger } from '../middleware' + +export type FeatherlessModel = { + id: string + created_at: string + updated_at: string + name: string + owned_by: string + model_class: string + favorites: number + downloads: number + status: 'active' | 'not_deployed' | 'pending_deploy' + health?: 'OFFLINE' | 'UNHEALTHY' | 'HEALTHY' + avg_rating: number + total_reviews: number +} + +let modelCache: FeatherlessModel[] = [] + +export function getFeatherModels() { + return modelCache +} + +async function getModelList() { + const res = await fetch('https://api.featherless.ai/feather/models?page=1&perPage=5000', { + headers: { + accept: '*/*', + }, + method: 'GET', + }) + + if (res.status && res.status > 200) { + return + } + + try { + const json = (await res.json()) as { items: FeatherlessModel[] } + + if (json.items.length) { + modelCache = json.items + } + + return json + } catch (ex) { + logger.warn({ err: ex, body: res.body, status: res.status }, `Featherless model list failed`) + } +} + +getModelList() + +setInterval(getModelList, 120000) diff --git a/srv/adapter/gemini.ts b/srv/adapter/gemini.ts index b2fb058b5..e585dacdd 100644 --- a/srv/adapter/gemini.ts +++ b/srv/adapter/gemini.ts @@ -1,5 +1,226 @@ +import needle from 'needle' +import { decryptText } from '../db/util' +import { getEncoderByName } from '../tokenize' +import { toChatCompletionPayload } from './chat-completion' +import { getStoppingStrings } from './prompt' import { ModelAdapter } from './type' +import { AppLog } from '../middleware' +import { sanitise, sanitiseAndTrim, trimResponseV2 } from '/common/requests/util' +import { requestStream } from './stream' -export const handleGemini: ModelAdapter = async function* (opts) {} +const BASE_URL = `https://generativelanguage.googleapis.com/v1beta/models/` -// function toPayload(prompt: string) {} +const SYSTEM_INCAPABLE: Record = { + 'gemini-1.0-pro-latest': true, +} + +export const handleGemini: ModelAdapter = async function* (opts) { + const encoder = getEncoderByName('gemma') + const messages = await toChatCompletionPayload(opts, encoder.count, opts.gen.maxTokens!) + + if (!opts.gen.googleModel) { + yield { error: 'Google AI Studio Model not set: Check your preset' } + return + } + + const payload: any = { + safetySettings, + generationConfig: { + temperature: opts.gen.temp, + maxOutputTokens: opts.gen.maxTokens, + topP: opts.gen.topP, + topK: opts.gen.topK, + stopSequences: getStoppingStrings(opts), + }, + } + + const systems: string[] = [] + const contents: any[] = [] + + for (const msg of messages) { + if (msg.role === 'system') { + systems.push(msg.content) + continue + } + + contents.push({ role: msg.role === 'user' ? 'user' : 'model', parts: [{ text: msg.content }] }) + continue + } + + payload.contents = contents + if (systems.length) { + if (!SYSTEM_INCAPABLE[opts.gen.googleModel]) { + payload.system_instruction = { + parts: [ + { + text: systems.join('\n'), + }, + ], + } + } else { + contents.unshift({ role: 'user', parts: [{ text: systems.join('\n') }] }) + } + } + + const key = opts.guest ? opts.gen.thirdPartyKey : decryptText(opts.gen.thirdPartyKey!) + const url = [ + BASE_URL, + opts.gen.thirdPartyModel, + ':', + opts.gen.streamResponse ? 'streamGenerateContent' : 'generateContent', + '?key=', + key, + ].join('') + + const stream = opts.gen.streamResponse + ? streamCompletion(url, payload, opts.log) + : fullCompletion(url, payload, opts.log) + + let accum = '' + + while (true) { + const generated = await stream.next() + + if (!generated || !generated.value) break + + if (typeof generated.value === 'string') { + accum = generated.value + break + } + + if ('error' in generated.value) { + yield { error: generated.value.error } + return + } + + if ('token' in generated.value) { + accum += generated.value.token + yield { partial: sanitiseAndTrim(accum, '', opts.replyAs, opts.characters, opts.members) } + } + + if ('tokens' in generated.value) { + accum = generated.value.tokens + break + } + } + + const parsed = sanitise(accum) + const trimmed = trimResponseV2( + parsed, + opts.replyAs, + opts.members, + opts.characters, + payload.stopSequences + ) + + yield trimmed || parsed +} + +async function* streamCompletion(url: string, body: any, log: AppLog) { + const resp = needle.post(url, body, { + parse: false, + json: true, + headers: { + Accept: 'application/json', + }, + }) + + const tokens = [] + + try { + const events = requestStream(resp, 'gemini') + + for await (const event of events) { + if (!event.data) continue + const data = JSON.parse(event.data) as { + index?: number + token: string + final: boolean + ptr: number + error: any + choices?: Array<{ index: number; finish_reason: string; logprobs: any; text: string }> + } + + if (data.error) { + yield { error: `Google AI Studio streaming request failed: ${data.error}` } + log.error({ error: data.error }, `Google AI Studio streaming request failed`) + return + } + + const res = data.choices ? data.choices[0] : data + const token = 'text' in res ? res.text : res.token + + tokens.push(token) + yield { token } + } + } catch (err: any) { + yield { error: `Google AI Studio streaming request failed: ${err.message || err}` } + return + } +} + +async function* fullCompletion(url: string, body: any, log: AppLog) { + const resp = await needle('post', url, body, { + headers: { 'Content-Type': 'application/json' }, + json: true, + }).catch((err) => ({ error: err })) + + if ('error' in resp) { + yield { error: `Google AI Studio request failed: ${resp.error?.message || resp.error}` } + log.error({ error: resp.error }, `Google AI Studio request failed`) + return + } + + if (resp.statusCode && resp.statusCode >= 400) { + yield { error: `Google AI Studio request failed: ${resp.statusMessage}` } + log.error({ error: resp.body }, `Google AI Studio request failed`) + return + } + + const data = resp.body + if (!data?.candidates?.length) { + const reason = data?.promptFeedback?.blockReason + if (reason) { + yield { error: `Google AI Studio blocked the request: ${reason}` } + log.warn({ error: data }, `Google AI Studio request blocked`) + return + } + + yield { error: `Google AI Studio did not return a response` } + return + } + + const content = data.candidates[0]?.content ?? data.candidates[0]?.output + + const tokens = typeof content === 'string' ? content : content?.parts?.[0]?.text + if (!tokens) { + yield { error: `Google AI Studio did not return a response` } + log.warn({ error: data }, `Google AI Studio returned empty response`) + return + } + + return { tokens } +} + +const safetySettings = [ + { + category: 'HARM_CATEGORY_HARASSMENT', + threshold: 'BLOCK_NONE', + }, + { + category: 'HARM_CATEGORY_HATE_SPEECH', + threshold: 'BLOCK_NONE', + }, + { + category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', + threshold: 'BLOCK_NONE', + }, + { + category: 'HARM_CATEGORY_DANGEROUS_CONTENT', + threshold: 'BLOCK_NONE', + }, + { + category: 'HARM_CATEGORY_CIVIC_INTEGRITY', + threshold: 'BLOCK_NONE', + }, +] diff --git a/srv/adapter/generate.ts b/srv/adapter/generate.ts index 896b70f7e..17bfdb04f 100644 --- a/srv/adapter/generate.ts +++ b/srv/adapter/generate.ts @@ -23,7 +23,7 @@ import needle from 'needle' import { HORDE_GUEST_KEY } from '../api/horde' import { getTokenCounter } from '../tokenize' import { getAppConfig } from '../api/settings' -import { SubscriptionPreset, getHandlers, getSubscriptionPreset, handlers } from './agnaistic' +import { SubscriptionPreset, getHandlers, getSubscriptionPreset } from './agnaistic' import { deepClone, getSubscriptionModelLimits, parseStops, tryParse } from '/common/util' import { isDefaultTemplate, templates } from '/common/presets/templates' import { @@ -382,7 +382,7 @@ export async function createChatStream( const { adapter, isThirdParty, model } = getAdapter(opts.chat, opts.user, opts.settings) const encoder = getTokenCounter(adapter, model, subscription?.preset) - const handler = handlers[adapter] + const handler = getHandlers(opts.settings) /** * Context limits set by the subscription need to be present before the prompt is finalised. diff --git a/srv/adapter/kobold.ts b/srv/adapter/kobold.ts index f8166bc3b..048fc8db0 100644 --- a/srv/adapter/kobold.ts +++ b/srv/adapter/kobold.ts @@ -46,7 +46,8 @@ export const handleThirdParty: ModelAdapter = async function* (opts) { opts.gen.thirdPartyFormat === 'aphrodite' || opts.gen.thirdPartyFormat === 'llamacpp' || opts.gen.thirdPartyFormat === 'exllamav2' || - opts.gen.thirdPartyFormat === 'koboldcpp' + opts.gen.thirdPartyFormat === 'koboldcpp' || + opts.gen.thirdPartyFormat === 'featherless' ? getThirdPartyPayload(opts) : { ...base, ...mappedSettings, prompt } @@ -77,7 +78,10 @@ export const handleThirdParty: ModelAdapter = async function* (opts) { yield { prompt: body.prompt } logger.debug(`Prompt:\n${body.prompt}`) - logger.debug({ ...body, prompt: null, images: null, messages: null }, '3rd-party payload') + logger.debug( + { ...body, prompt: null, images: null, messages: null }, + `3rd-party payload ${opts.gen.thirdPartyFormat}` + ) const stream = await dispatch(opts, body) @@ -114,6 +118,10 @@ export const handleThirdParty: ModelAdapter = async function* (opts) { } } + if (opts.gen.service === 'kobold' && body.model) { + yield { meta: { model: body.model, fmt: opts.gen.thirdPartyFormat } } + } + const parsed = sanitise(accum) const trimmed = trimResponseV2(parsed, opts.replyAs, members, characters, stop_sequence) @@ -171,6 +179,13 @@ async function dispatch(opts: AdapterProps, body: any) { : fullCompletion(url, body, headers, opts.gen.thirdPartyFormat, opts.log) } + case 'featherless': { + const url = 'https://api.featherless.ai/v1/completions' + return opts.gen.streamResponse + ? streamCompletion(url, body, headers, opts.gen.thirdPartyFormat, opts.log) + : fullCompletion(url, body, headers, opts.gen.thirdPartyFormat, opts.log) + } + default: const isStreamSupported = await checkStreamSupported(`${baseURL}/api/extra/version`) return opts.gen.streamResponse && isStreamSupported @@ -219,6 +234,21 @@ async function getHeaders(opts: AdapterProps) { break } + case 'featherless': { + if (!opts.gen.featherlessModel) { + throw new Error(`Featherless model not set. Check your preset`) + } + + const key = opts.gen.thirdPartyKey + + const apiKey = key ? (opts.guest ? key : decryptText(key)) : '' + if (apiKey) { + headers['Authorization'] = `Bearer ${apiKey}` + } + headers['Content-Type'] = 'application/json' + break + } + case 'mistral': { const key = opts.user.mistralKey if (!key) throw new Error(`Mistral API key not set. Check your AI->3rd-party settings`) @@ -315,7 +345,7 @@ const streamCompletion = async function* ( parse: false, json: true, headers: { - Accept: `text/event-stream`, + Accept: format === 'featherless' ? 'application/json' : `text/event-stream`, ...headers, }, }) diff --git a/srv/adapter/payloads.ts b/srv/adapter/payloads.ts index 7c78c45ac..0c45d9de1 100644 --- a/srv/adapter/payloads.ts +++ b/srv/adapter/payloads.ts @@ -158,6 +158,26 @@ function getBasePayload(opts: AdapterProps, stops: string[] = []) { return body } + if (format === 'featherless') { + const payload: any = { + model: gen.featherlessModel, + prompt, + stop: getStoppingStrings(opts, stops), + presence_penalty: gen.presencePenalty, + frequency_penalty: gen.frequencyPenalty, + repetition_penalty: gen.repetitionPenalty, + temperature: gen.temp, + top_p: gen.topP, + top_k: gen.topK, + min_p: gen.minP, + max_tokens: gen.maxTokens, + include_stop_str_in_output: false, + stream: gen.streamResponse, + } + + return payload + } + if (format === 'ollama') { const payload: any = { prompt, diff --git a/srv/adapter/stream.ts b/srv/adapter/stream.ts index 21d811d4b..99c527c00 100644 --- a/srv/adapter/stream.ts +++ b/srv/adapter/stream.ts @@ -4,6 +4,7 @@ import { ThirdPartyFormat } from '/common/adapters' import { logger } from '../middleware' import needle from 'needle' import { AsyncDelta, Completion, CompletionGenerator } from './type' +import * as partial from 'partial-json' export type ServerSentEvent = { id?: string @@ -155,7 +156,7 @@ export function requestStream( code = statusCode failed = true emitter.done() - } else if (format === 'openrouter') { + } else if (format === 'openrouter' || format === 'gemini') { if ( contentType.startsWith('application/json') || contentType.startsWith('text/event-stream') @@ -188,90 +189,137 @@ export function requestStream( }) let incomplete = '' + let lastContent = '' stream.on('data', (chunk: Buffer) => { - if (failed && !emitter.isDone()) { - const result = tryParse(chunk.toString()) - const error = result?.error?.message || result?.message - if (error) { - emitter.push({ error: `SSE request failed: ${error}` }) - emitter.done() + try { + if (failed && !emitter.isDone()) { + const result = tryParse(chunk.toString()) + const error = result?.error?.message || result?.message + if (error) { + emitter.push({ error: `SSE request failed: ${error}` }) + emitter.done() + } + + return } - return - } + const data = incomplete + chunk.toString() + incomplete = '' - const data = incomplete + chunk.toString() - incomplete = '' + if (format === 'gemini') { + const index = findMalformedStart(data) + if (index === -1) { + return + } + const partialJson = data.slice(index) + const json = partial.parse(partialJson, partial.ALL) - const messages = data.split(/\r?\n\r?\n/).filter((l) => !!l && l !== ': OPENROUTER PROCESSING') + const candidates = Array.isArray(json) ? json[0]?.candidates : json?.candidates + const content = candidates ? candidates[0]?.content?.parts?.[0]?.text : null - for (const msg of messages) { - if (format === 'vllm') { - const event = parseVLLM(incomplete + msg) - if (!event) continue + if (content) { + if (content === lastContent) { + lastContent = content + return + } - const choice = event.choices?.[0] - if (!choice) { - continue + if (lastContent && content.endsWith(lastContent)) { + lastContent = content + return + } + + lastContent = content + emitter.push({ data: JSON.stringify({ token: content }) }) + return + } + // Fallback method + const start = data.indexOf('"text": "') + if (start === -1) return + const end = data.slice(start + 9).indexOf('"\n ') + + if (end === -1) return + const tokens = data.slice(start + 9, start + 9 + end) + if (tokens) { + emitter.push({ data: JSON.stringify({ token: JSON.parse('"' + tokens + '"') }) }) } + return + } - const token = choice.delta?.content || choice.text - if (!token) continue + const messages = data + .split(/\r?\n\r?\n/) + .filter((l) => !!l && l !== ': OPENROUTER PROCESSING') - const data = JSON.stringify({ token }) - emitter.push({ data }) - continue - } + for (const msg of messages) { + if (format === 'vllm') { + const event = parseVLLM(incomplete + msg) + if (!event) continue + + const choice = event.choices?.[0] + if (!choice) { + continue + } - if (format === 'ollama') { - const event = parseOllama(incomplete + msg, emitter) + const token = choice.delta?.content || choice.text + if (!token) continue - if (event.error) { - const data = JSON.stringify({ error: event.error }) + const data = JSON.stringify({ token }) emitter.push({ data }) continue } - const token = event?.response - if (!token) continue + if (format === 'ollama') { + const event = parseOllama(incomplete + msg, emitter) - const data = JSON.stringify({ token }) - emitter.push({ data }) - continue - } + if (event.error) { + const data = JSON.stringify({ error: event.error }) + emitter.push({ data }) + continue + } + + const token = event?.response + if (!token) continue - if (format === 'aphrodite') { - const event = parseAphrodite(incomplete + msg, emitter) - if (!event?.data) { - incomplete += msg + const data = JSON.stringify({ token }) + emitter.push({ data }) continue } - const token = getAphroditeToken(event.data) - if (!token) continue + if (format === 'aphrodite') { + const event = parseAphrodite(incomplete + msg, emitter) + if (!event?.data) { + incomplete += msg + continue + } - const data = JSON.stringify({ index: token.index, token: token.token }) - emitter.push({ data }) - continue - } + const token = getAphroditeToken(event.data) + if (!token) continue - const event: any = parseEvent(msg) + const data = JSON.stringify({ index: token.index, token: token.token }) + emitter.push({ data }) + continue + } - if (!event.data) { - continue - } + const event: any = parseEvent(msg) - const data: string = event.data - if (typeof data === 'string' && incompleteJson(data)) { - incomplete = msg - continue - } + if (!event.data) { + continue + } + + const eventData: string = event.data + if (typeof eventData === 'string' && incompleteJson(eventData)) { + incomplete = msg + continue + } - if (event.event) { - event.type = event.event + if (event.event) { + event.type = event.event + } + emitter.push(event) } - emitter.push(event) + } catch (ex) { + logger.error({ err: ex, format }, `Unhandled ${format} event stream parsing error`) + emitter.push({ error: `Unexpected error occurred while parsing event stream` }) } }) @@ -426,3 +474,10 @@ export async function websocketStream(opts: { url: string; body: any }, timeoutM return emitter.stream } + +function findMalformedStart(json: string) { + const brace = json.indexOf('{') + const bracket = json.indexOf('[') + + return Math.min(brace, bracket) +} diff --git a/srv/adapter/template-chat-payload.ts b/srv/adapter/template-chat-payload.ts index 8fac68414..1d10d5f1c 100644 --- a/srv/adapter/template-chat-payload.ts +++ b/srv/adapter/template-chat-payload.ts @@ -1,5 +1,9 @@ +import { AdapterProps } from './type' import { replaceTags } from '/common/presets/templates' -import { AppSchema } from '/common/types' +import { getContextLimit } from '/common/prompt' +import { promptOrderToSections } from '/common/prompt-order' +import { parseTemplate } from '/common/template-parser' +import { AppSchema, TokenCounter } from '/common/types' export function renderMessagesToPrompt( preset: AppSchema.UserGenPreset, @@ -40,3 +44,53 @@ export function renderMessagesToPrompt( const prompt = replaceTags(template, preset.modelFormat || 'ChatML') return { prompt, stop: replaceTags('', preset.modelFormat || 'ChatML') } } + +export async function toChatMessages(opts: AdapterProps, counter: TokenCounter) { + const sections = promptOrderToSections({ + format: opts.gen.modelFormat, + order: opts.gen.promptOrder, + }) + + const maxContext = getContextLimit(opts.user, opts.gen) + + const system = await parse(opts, counter, sections.system) + const defs = await parse(opts, counter, sections.defs) + const post = await parse(opts, counter, sections.post) + const note = opts.char.insert + ? await parse(opts, counter, opts.char.insert.prompt) + : { parsed: '', count: 0 } + + let limit = maxContext - system.count - defs.count - post.count - note.count + const history = await parse(opts, counter, sections.history, limit) + + const messages = [ + { role: 'system', content: system.parsed }, + { role: 'user', content: defs.parsed }, + ] + + const sender = (opts.impersonate?.name || opts.sender.handle) + ':' + for (const line of history.sections.sections.history) { + const role = line.startsWith(sender) ? 'user' : 'assistant' + messages.push({ role, content: line }) + } + + messages.push({ role: 'assistant', content: post.parsed }) + return messages +} + +async function parse(opts: AdapterProps, counter: TokenCounter, text: string, limit?: number) { + const template = replaceTags(text, 'None') + const { parsed, sections } = await parseTemplate(template, { + char: opts.char, + chat: opts.chat, + jsonValues: {}, + sender: opts.sender, + impersonate: opts.impersonate, + lines: opts.lines, + limit: limit ? { context: limit, encoder: counter } : undefined, + }) + + const count = await counter(parsed) + + return { parsed, count, sections } +} diff --git a/srv/api/settings.ts b/srv/api/settings.ts index 1fa57e1da..4e50391e0 100644 --- a/srv/api/settings.ts +++ b/srv/api/settings.ts @@ -11,6 +11,7 @@ import { RegisteredAdapter } from '/common/adapters' import { getHordeWorkers, getHordeModels } from './horde' import { getOpenRouterModels } from '../adapter/openrouter' import { updateRegisteredSubs } from '../adapter/agnaistic' +import { getFeatherModels } from '../adapter/featherless' const router = Router() @@ -29,6 +30,10 @@ export const getPublicSubscriptions = handle(async () => { router.get('/subscriptions', getPublicSubscriptions) router.get('/', getSettings) +router.get('/featherless', (_, res) => { + const models = getFeatherModels() + res.json({ models }) +}) export default router diff --git a/web/App.tsx b/web/App.tsx index 571fcf144..0e12a9e73 100644 --- a/web/App.tsx +++ b/web/App.tsx @@ -160,7 +160,7 @@ const Layout: Component<{ children?: any }> = (props) => { id="main-content" class="w-full overflow-y-auto" classList={{ - 'sm:ml-[302px]': cfg.showMenu, + 'sm:ml-[320px]': cfg.showMenu, 'sm:ml-0': !cfg.showMenu, }} data-background diff --git a/web/Navigation.tsx b/web/Navigation.tsx index 4fc11f68c..17d029da7 100644 --- a/web/Navigation.tsx +++ b/web/Navigation.tsx @@ -136,7 +136,7 @@ const Navigation: Component = () => { <>
settingStore.menu(true)} classList={{ hidden: !isChat() }} @@ -155,11 +155,8 @@ const Navigation: Component = () => { role="navigation" aria-label="Main" > -
-
+
+
{ @@ -209,7 +206,7 @@ const Navigation: Component = () => {
{nav.title}
- {nav.body} +
{nav.body}
@@ -264,71 +261,73 @@ const UserNavigation: Component = () => { return ( <> - +
+ - - - - + + + + - + - + - - - - + + + + - - - - - - - - - - - - - + + + + + + + + + - - - - - - Configuration - - - Users - - - Subscriptions - - - Announcements - - - + + + - + + + + + + Configuration + + + Users + + + Subscriptions + + + Announcements + + + + + +
@@ -346,60 +345,67 @@ const GuestNavigation: Component = () => { return ( <> - - soundEmitter.emit('menu-item-clicked', 'login')} - class="tour-register" - > - Login - - +
+ + soundEmitter.emit('menu-item-clicked', 'login')} + class="tour-register" + > + Login + + - - + + - + - - - - + + + + - + - + - - soundEmitter.emit('menu-item-clicked', 'presets')} - > - Presets - - - - - - + + soundEmitter.emit('menu-item-clicked', 'presets')} + > + Presets + + + + + + - - + + + - - + +
diff --git a/web/app.css b/web/app.css index c7cbf4ac1..3794d5d32 100644 --- a/web/app.css +++ b/web/app.css @@ -71,7 +71,7 @@ .drawer--pane-open { animation-fill-mode: forwards; animation: hideDrawer 0s ease 800ms; - @apply left-[-302px] sm:left-0 sm:left-0; + @apply left-[-320px] sm:left-0 sm:left-0; } .drawer--hide { @@ -84,10 +84,9 @@ /* position: fixed; */ top: 0; left: 0; - min-width: 302px; - max-width: 302px; height: calc(100% - 16px); transition: 800ms ease; + @apply min-w-[320px] max-w-[320px]; } .drawer > * > a.active { diff --git a/web/shared/CustomSelect.tsx b/web/shared/CustomSelect.tsx index b7032ae36..2290bce53 100644 --- a/web/shared/CustomSelect.tsx +++ b/web/shared/CustomSelect.tsx @@ -5,6 +5,7 @@ import { RootModal } from './Modal' import { PresetAISettings } from '/common/adapters' import { ComponentSubscriber, useValidServiceSetting } from './util' import { forms } from '../emitter' +import TextInput from './TextInput' export type CustomOption = { label: string | JSX.Element @@ -12,22 +13,24 @@ export type CustomOption = { } export const CustomSelect: Component<{ + buttonLabel: string | JSX.Element | ((opt: CustomOption) => JSX.Element | string) + onSelect: (opt: CustomOption) => void + options: CustomOption[] + value: any + schema?: ButtonSchema size?: 'sm' | 'md' | 'lg' | 'pill' - buttonLabel: string | JSX.Element | ((opt: CustomOption) => JSX.Element | string) modalTitle?: string | JSX.Element label?: string | JSX.Element helperText?: string | JSX.Element fieldName?: string - options: CustomOption[] selected: any | undefined - onSelect: (opt: CustomOption) => void hide?: boolean aiSetting?: keyof PresetAISettings parentClass?: string classList?: Record - value: any emitter?: ComponentSubscriber<'close'> + search?: (value: string, search: string) => boolean }> = (props) => { let ref: HTMLInputElement const [open, setOpen] = createSignal(false) @@ -92,7 +95,12 @@ export const CustomSelect: Component<{ setOpen(false)} title={props.modalTitle}>
- +
@@ -101,30 +109,59 @@ export const CustomSelect: Component<{ } const OptionList: Component<{ + search?: (text: string, search: string) => boolean options: CustomOption[] onSelect: (opt: CustomOption) => void title?: string selected?: string -}> = (props) => ( -
- -
{props.title}
-
-
- - {(option) => ( -
props.onSelect(option)} - > -
{option.label}
-
- )} -
+}> = (props) => { + const [filter, setFilter] = createSignal('') + + const filtered = createMemo(() => { + if (!props.search) return props.options + + const input = filter().trim() + if (!input) return props.options + + return props.options.filter((opt) => + typeof opt.label === 'string' + ? props.search?.(opt.label, input) || props.search?.(opt.value, input) + : props.search?.(opt.value, input) + ) + }) + + return ( +
+ +
{props.title}
+
+ + + setFilter(ev.currentTarget.value)} + onInputText={(text) => setFilter(text)} + /> + + +
+ + {(option) => ( +
props.onSelect(option)} + > +
{option.label}
+
+ )} +
+
-
-) + ) +} diff --git a/web/shared/PresetSettings/Fields.tsx b/web/shared/PresetSettings/Fields.tsx index 099cafa99..a37ab736f 100644 --- a/web/shared/PresetSettings/Fields.tsx +++ b/web/shared/PresetSettings/Fields.tsx @@ -1,12 +1,12 @@ -import { Component, Show, createMemo } from 'solid-js' -import { PresetAISettings, ThirdPartyFormat } from '/common/adapters' +import { Component, Show, createMemo, createSignal, onMount } from 'solid-js' +import { GOOGLE_MODELS, PresetAISettings, ThirdPartyFormat } from '/common/adapters' import { PresetProps } from './types' import { AppSchema } from '/common/types/schema' import TextInput from '../TextInput' import Button, { ToggleButton } from '../Button' import { getStore } from '/web/store/create' import RangeInput from '../RangeInput' -import type { UserState } from '/web/store' +import { settingStore, type UserState } from '/web/store' import Select from '../Select' import { MODEL_FORMATS } from './General' import { defaultPresets } from '/common/default-preset' @@ -15,6 +15,7 @@ import { SubscriptionModelLevel } from '/common/types/presets' import { useValidServiceSetting } from '../util' import { Card } from '../Card' import PromptEditor from '../PromptEditor' +import { CustomSelect } from '../CustomSelect' export type Field = Component< PresetProps & { @@ -182,6 +183,9 @@ export const ThirdParty: Field = (props) => { value={props.inherit?.thirdPartyUrl || ''} disabled={props.disabled} aiSetting={'thirdPartyUrl'} + hide={ + props.format === 'featherless' || props.format === 'mistral' || props.format === 'gemini' + } /> { ) } + +export const FeatherlessModels: Field = (props) => { + const state = settingStore((s) => s.featherless) + const [selected, setSelected] = createSignal(props.inherit?.featherlessModel || '') + + const options = createMemo(() => { + return state + .filter((s) => s.status === 'active' && s.health === 'HEALTHY') + .map((s) => ({ label: s.name, value: s.id })) + }) + + onMount(() => { + if (!state.length) { + settingStore.getFeatherless() + } + }) + + const search = (value: string, input: string) => { + let re = new RegExp(input.replace(/\*/gi, '[a-z0-9]'), 'gi') + return !!value.match(re) + } + + return ( + setSelected(opt.value)} + buttonLabel={selected() || 'None Selected'} + selected={selected()} + hide={props.service !== 'kobold' || props.format !== 'featherless'} + /> + ) +} + +export const GoogleModels: Field = (props) => { + const [selected, setSelected] = createSignal(props.inherit?.googleModel || '') + const label = createMemo(() => { + const id = selected() + if (!id) return 'None Selected' + const match = Object.values(GOOGLE_MODELS).find((model) => model.id === id) + if (!match) return 'Invalid Model' + return match.label + }) + + const options = createMemo(() => { + const list = Object.values(GOOGLE_MODELS).map(({ label, id }) => ({ label, value: id })) + return list + }) + + return ( + value.toLowerCase().includes(search.toLowerCase())} + onSelect={(opt) => setSelected(opt.value)} + buttonLabel={label()} + selected={selected()} + hide={props.service !== 'kobold' || props.format !== 'gemini'} + /> + ) +} +createSignal diff --git a/web/shared/PresetSettings/General.tsx b/web/shared/PresetSettings/General.tsx index 1e39d1d9d..ab4df4eeb 100644 --- a/web/shared/PresetSettings/General.tsx +++ b/web/shared/PresetSettings/General.tsx @@ -20,7 +20,16 @@ import { PhraseBias, StoppingStrings } from '../PhraseBias' import { BUILTIN_FORMATS } from '/common/presets/templates' import { getSubscriptionModelLimits } from '/common/util' import { forms } from '/web/emitter' -import { Field, ContextSize, ModelFormat, ResponseLength, Temperature, ThirdParty } from './Fields' +import { + Field, + ContextSize, + ModelFormat, + ResponseLength, + Temperature, + ThirdParty, + FeatherlessModels, + GoogleModels, +} from './Fields' export const MODEL_FORMATS = Object.keys(BUILTIN_FORMATS).map((label) => ({ label, value: label })) @@ -159,6 +168,7 @@ export const GeneralSettings: Field = (props) => { value={props.inherit?.thirdPartyUrlNoSuffix} service={props.service} aiSetting="thirdPartyUrl" + hide={props.format === 'featherless'} />
@@ -175,10 +185,15 @@ export const GeneralSettings: Field = (props) => { 'claudeModel', 'mistralModel', 'replicateModelName', - 'thirdPartyModel' + 'thirdPartyModel', + 'thirdPartyKey' ) } > + + + +