-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
25e5265
commit 8c2933b
Showing
41 changed files
with
9,887 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import { AppConfig } from "./config"; | ||
export declare function hasModelInCache(modelId: string, appConfig?: AppConfig): Promise<boolean>; | ||
export declare function deleteModelAllInfoInCache(modelId: string, appConfig?: AppConfig): Promise<void>; | ||
export declare function deleteModelInCache(modelId: string, appConfig?: AppConfig): Promise<void>; | ||
export declare function deleteChatConfigInCache(modelId: string, appConfig?: AppConfig): Promise<void>; | ||
export declare function deleteModelWasmInCache(modelId: string, appConfig?: AppConfig): Promise<void>; | ||
//# sourceMappingURL=cache_util.d.ts.map |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
import { ResponseFormat } from "./openai_api_protocols"; | ||
import { LogitProcessor, InitProgressCallback } from "./types"; | ||
/** | ||
* Conversation template config | ||
*/ | ||
export interface ConvTemplateConfig { | ||
system_template: string; | ||
system_message: string; | ||
roles: Record<Role, string>; | ||
role_templates?: Partial<Record<Role, string>>; | ||
seps: Array<string>; | ||
role_content_sep?: string; | ||
role_empty_sep?: string; | ||
offset: number; | ||
stop_str: Array<string>; | ||
system_prefix_token_ids?: Array<number>; | ||
stop_token_ids: Array<number>; | ||
add_role_after_system_message?: boolean; | ||
} | ||
export declare enum Role { | ||
user = "user", | ||
assistant = "assistant" | ||
} | ||
/** | ||
* Place holders that can be used in role templates. | ||
* For example, a role template of | ||
* `<<question>> ${MessagePlaceholders.USER} <<function>> ${MessagePlaceholders.FUNCTION}` | ||
* will insert the user message to ${MessagePlaceholders.USER} | ||
* and insert the function message to ${MessagePlaceholders.FUNCTION} | ||
* at run time. | ||
*/ | ||
export declare enum MessagePlaceholders { | ||
system = "{system_message}", | ||
user = "{user_message}", | ||
assistant = "{assistant_message}", | ||
tool = "{tool_message}", | ||
function = "{function_string}" | ||
} | ||
/** | ||
* Config of one chat model, a data structure representing `mlc-chat-config.json`. | ||
* This only corresponds to the chat-related fields and `tokenizer_files` of `mlc-chat-config.json`. | ||
* Only these fields affect the conversation in runtime. | ||
* i.e. The third part in https://llm.mlc.ai/docs/get_started/mlc_chat_config.html. | ||
* | ||
* This is initialized in `ChatModule.reload()` with the model's `mlc-chat-config.json`. | ||
*/ | ||
export interface ChatConfig { | ||
tokenizer_files: Array<string>; | ||
conv_config?: Partial<ConvTemplateConfig>; | ||
conv_template: string | ConvTemplateConfig; | ||
mean_gen_len: number; | ||
max_gen_len: number; | ||
shift_fill_factor: number; | ||
repetition_penalty: number; | ||
frequency_penalty: number; | ||
presence_penalty: number; | ||
top_p: number; | ||
temperature: number; | ||
bos_token_id?: number; | ||
} | ||
/** | ||
* Custom options that can be used to override known config values. | ||
*/ | ||
export interface ChatOptions extends Partial<ChatConfig> { | ||
} | ||
/** | ||
* Optional configurations for `CreateEngine()` and `CreateWebWorkerEngine()`. | ||
* | ||
* chatOpts: To optionally override the `mlc-chat-config.json` of `modelId`. | ||
* appConfig: Configure the app, including the list of models and whether to use IndexedDB cache. | ||
* initProgressCallback: A callback for showing the progress of loading the model. | ||
* logitProcessorRegistry: A register for stateful logit processors, see `webllm.LogitProcessor`. | ||
* | ||
* @note All fields are optional, and `logitProcessorRegistry` is only used for `CreateEngine()` | ||
* not `CreateWebWorkerEngine()`. | ||
*/ | ||
export interface EngineConfig { | ||
chatOpts?: ChatOptions; | ||
appConfig?: AppConfig; | ||
initProgressCallback?: InitProgressCallback; | ||
logitProcessorRegistry?: Map<string, LogitProcessor>; | ||
} | ||
/** | ||
* Config for a single generation. | ||
* Essentially `ChatConfig` without `tokenizer_files`, `conv_config`, or `conv_template`. | ||
* We also support additional fields not present in `mlc-chat-config.json` due to OpenAI-like APIs. | ||
* | ||
* Note that all values are optional. If unspecified, we use whatever values in `ChatConfig` | ||
* initialized during `ChatModule.reload()`. | ||
*/ | ||
export interface GenerationConfig { | ||
mean_gen_len?: number; | ||
shift_fill_factor?: number; | ||
repetition_penalty?: number; | ||
top_p?: number | null; | ||
temperature?: number | null; | ||
max_gen_len?: number | null; | ||
frequency_penalty?: number | null; | ||
presence_penalty?: number | null; | ||
stop?: string | null | Array<string>; | ||
n?: number | null; | ||
logit_bias?: Record<string, number> | null; | ||
logprobs?: boolean | null; | ||
top_logprobs?: number | null; | ||
response_format?: ResponseFormat | null; | ||
} | ||
export declare function postInitAndCheckGenerationConfigValues(config: GenerationConfig): void; | ||
/** | ||
* Information for a model. | ||
* @param model_url: the huggingface link to download the model weights. | ||
* @param model_id: what we call the model. | ||
* @param model_lib_url: link to the model library (wasm file) the model uses. | ||
* @param vram_required_MB: amount of vram in MB required to run the model (can use | ||
* `utils/vram_requirements` to calculate). | ||
* @param low_resource_required: whether the model can run on limited devices (e.g. Android phone). | ||
* @param buffer_size_required_bytes: required `maxStorageBufferBindingSize`, different for each device. | ||
* @param required_features: feature needed to run this model (e.g. shader-f16). | ||
*/ | ||
export interface ModelRecord { | ||
model_url: string; | ||
model_id: string; | ||
model_lib_url: string; | ||
vram_required_MB?: number; | ||
low_resource_required?: boolean; | ||
buffer_size_required_bytes?: number; | ||
required_features?: Array<string>; | ||
} | ||
/** | ||
* Extra configuration that can be | ||
* passed to the load. | ||
* | ||
* @param model_list: models to be used. | ||
* @param useIndexedDBCache: if true, will use IndexedDBCache to cache models and other artifacts. | ||
* If false or unspecified, will use the Cache API. For more information of the two, see: | ||
* https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser | ||
* | ||
* @note Note that the Cache API is more well-tested in WebLLM as of now. | ||
*/ | ||
export interface AppConfig { | ||
model_list: Array<ModelRecord>; | ||
useIndexedDBCache?: boolean; | ||
} | ||
/** | ||
* modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the | ||
* `model_lib_url`s in `prebuiltAppConfig`. | ||
* | ||
* @note The model version does not have to match the npm version, since not each npm update | ||
* requires an update of the model libraries. | ||
*/ | ||
export declare const modelVersion = "v0_2_34"; | ||
export declare const modelLibURLPrefix = "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/"; | ||
/** | ||
* Default models and model library mapping to be used if unspecified. | ||
* | ||
* @note This is the only source of truth of which prebuilt model libraries are compatible with the | ||
* current WebLLM npm version. | ||
*/ | ||
export declare const prebuiltAppConfig: AppConfig; | ||
//# sourceMappingURL=config.d.ts.map |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import { ConvTemplateConfig, Role } from "./config"; | ||
/** | ||
* Helper to keep track of history conversations. | ||
*/ | ||
export declare class Conversation { | ||
messages: Array<[Role, string, string | undefined]>; | ||
readonly config: ConvTemplateConfig; | ||
function_string: string; | ||
use_function_calling: boolean; | ||
override_system_message?: string; | ||
constructor(config: ConvTemplateConfig); | ||
private getPromptArrayInternal; | ||
/** | ||
* Get prompt arrays with the first one as system. | ||
* | ||
* @returns The prompt array. | ||
*/ | ||
getPromptArray(): Array<string>; | ||
/** | ||
* Get the last round of prompt has not been fed as input. | ||
* | ||
* @note This function needs to be used with the assumption that | ||
* the caller call appendMessage then appendReplyHeader. | ||
* | ||
* @returns The prompt array. | ||
*/ | ||
getPrompArrayLastRound(): string[]; | ||
/** | ||
* Resets all states for this.conversation. | ||
*/ | ||
reset(): void; | ||
getStopStr(): string[]; | ||
getStopTokens(): number[]; | ||
appendMessage(role: Role, message: string, role_name?: string): void; | ||
appendReplyHeader(role: Role): void; | ||
finishReply(message: string): void; | ||
} | ||
export declare function getConversation(conv_template: string | ConvTemplateConfig, conv_config?: Partial<ConvTemplateConfig>): Conversation; | ||
/** | ||
* Compare the states of two conversation instances. Equality is defined as their getPromptArray() | ||
* should return the exact same things, which is determined by fields: messages, function_string, | ||
* use_function_calling, and override_system_message. | ||
* | ||
* @returns True if `convA` equals to `convB` | ||
* @note We assume convA and convB has the same `this.config`. | ||
*/ | ||
export declare function compareConversationObject(convA: Conversation, convB: Conversation): boolean; | ||
//# sourceMappingURL=conversation.d.ts.map |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import * as API from "./openai_api_protocols/apis"; | ||
import { ChatOptions, AppConfig, GenerationConfig, EngineConfig } from "./config"; | ||
import { ChatCompletionRequest, ChatCompletion, ChatCompletionChunk, ChatCompletionFinishReason, ChatCompletionRequestNonStreaming, ChatCompletionRequestStreaming, ChatCompletionRequestBase } from "./openai_api_protocols/index"; | ||
import { InitProgressCallback, EngineInterface, GenerateProgressCallback, LogitProcessor } from "./types"; | ||
/** | ||
* Creates `Engine`, and loads `modelId` onto WebGPU. | ||
* | ||
* Equivalent to `new webllm.Engine().reload(...)`. | ||
* | ||
* @param modelId The model to load, needs to either be in `webllm.prebuiltAppConfig`, or in | ||
* `engineConfig.appConfig`. | ||
* @param engineConfig Optionally configures the engine, see `webllm.EngineConfig`. | ||
* @returns An intialized `WebLLM.Engine` with `modelId` loaded. | ||
*/ | ||
export declare function CreateEngine(modelId: string, engineConfig?: EngineConfig): Promise<Engine>; | ||
/** | ||
* The main interface of Engine, which loads a model and performs tasks. | ||
* | ||
* You can either initialize one with `webllm.CreateEngine(modelId)`, or `webllm.Engine().reload(modelId)`. | ||
*/ | ||
export declare class Engine implements EngineInterface { | ||
chat: API.Chat; | ||
private currentModelId?; | ||
private logger; | ||
private logitProcessorRegistry?; | ||
private logitProcessor?; | ||
private pipeline?; | ||
private initProgressCallback?; | ||
private interruptSignal; | ||
private deviceLostIsError; | ||
private config?; | ||
constructor(); | ||
setInitProgressCallback(initProgressCallback?: InitProgressCallback): void; | ||
getInitProgressCallback(): InitProgressCallback | undefined; | ||
setLogitProcessorRegistry(logitProcessorRegistry?: Map<string, LogitProcessor>): void; | ||
reload(modelId: string, chatOpts?: ChatOptions, appConfig?: AppConfig): Promise<void>; | ||
generate(input: string | ChatCompletionRequestNonStreaming, progressCallback?: GenerateProgressCallback, streamInterval?: number, genConfig?: GenerationConfig): Promise<string>; | ||
private _generate; | ||
/** | ||
* Similar to `generate()`; but instead of using callback, we use an async iterable. | ||
* @param request Request for chat completion. | ||
* @param genConfig Generation config extraced from `request`. | ||
*/ | ||
chatCompletionAsyncChunkGenerator(request: ChatCompletionRequestStreaming, genConfig: GenerationConfig): AsyncGenerator<ChatCompletionChunk, void, void>; | ||
/** | ||
* Completes a single ChatCompletionRequest. | ||
* | ||
* @param request A OpenAI-style ChatCompletion request. | ||
* | ||
* @note For each choice (i.e. `n`), a request is defined by a single `prefill()` and mulitple | ||
* `decode()`. This is important as it determines the behavior of various fields including `seed`. | ||
*/ | ||
chatCompletion(request: ChatCompletionRequestNonStreaming): Promise<ChatCompletion>; | ||
chatCompletion(request: ChatCompletionRequestStreaming): Promise<AsyncIterable<ChatCompletionChunk>>; | ||
chatCompletion(request: ChatCompletionRequestBase): Promise<AsyncIterable<ChatCompletionChunk> | ChatCompletion>; | ||
interruptGenerate(): Promise<void>; | ||
runtimeStatsText(): Promise<string>; | ||
resetChat(keepStats?: boolean): Promise<void>; | ||
unload(): Promise<void>; | ||
getMaxStorageBufferBindingSize(): Promise<number>; | ||
getGPUVendor(): Promise<string>; | ||
forwardTokensAndSample(inputIds: Array<number>, isPrefill: boolean): Promise<number>; | ||
/** | ||
* @returns Whether the generation stopped. | ||
*/ | ||
stopped(): boolean; | ||
/** | ||
* @returns Finish reason; undefined if generation not started/stopped yet. | ||
*/ | ||
getFinishReason(): ChatCompletionFinishReason | undefined; | ||
/** | ||
* Get the current generated response. | ||
* | ||
* @returns The current output message. | ||
*/ | ||
getMessage(): Promise<string>; | ||
/** | ||
* Get a new Conversation object based on the chat completion request. | ||
* | ||
* @param request The incoming ChatCompletionRequest | ||
* @note `request.messages[-1]` is not included as it would be treated as a normal input to | ||
* `prefill()`. | ||
*/ | ||
private getConversationFromChatCompletionRequest; | ||
/** | ||
* Returns the function string based on the request.tools and request.tool_choice, raises erros if | ||
* encounter invalid request. | ||
* | ||
* @param request The chatCompletionRequest we are about to prefill for. | ||
* @returns The string used to set Conversatoin.function_string | ||
*/ | ||
private getFunctionCallUsage; | ||
/** | ||
* Run a prefill step with a given input. | ||
* | ||
* If `input` is a chatCompletionRequest, we treat `input.messages[-1]` as the usual user input. | ||
* We then convert `input.messages[:-1]` to a `Conversation` object, representing a conversation | ||
* history. | ||
* | ||
* If the new `Conversation` object matches the current one loaded, it means we are | ||
* performing multi-round chatting, so we do not reset, hence reusing KV cache. Otherwise, we | ||
* reset every thing, treating the request as something completely new. | ||
* | ||
* @param input The input prompt, or `messages` in OpenAI-like APIs. | ||
*/ | ||
prefill(input: string | ChatCompletionRequest, genConfig?: GenerationConfig): Promise<void>; | ||
/** | ||
* Run a decode step to decode the next token. | ||
*/ | ||
decode(genConfig?: GenerationConfig): Promise<void>; | ||
private getPipeline; | ||
private asyncLoadTokenizer; | ||
} | ||
//# sourceMappingURL=engine.d.ts.map |
Oops, something went wrong.