Skip to content

Commit

Permalink
print message to test
Browse files Browse the repository at this point in the history
  • Loading branch information
Neet-Nestor committed May 15, 2024
1 parent 25e5265 commit 8c2933b
Show file tree
Hide file tree
Showing 41 changed files with 9,887 additions and 24 deletions.
10 changes: 6 additions & 4 deletions app/client/webllm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ export class WebLLMApi implements LLMApi {
options.onUpdate?.(reply, chunk.choices[0].delta.content);
}
}
} catch (e) {
console.error("Error in streaming chatCompletion", e);
} catch (err) {
console.error("Error in streaming chatCompletion", err);
options.onError?.(err as Error);
}
} else {
try {
Expand All @@ -80,8 +81,9 @@ export class WebLLMApi implements LLMApi {
messages: options.messages as ChatCompletionMessageParam[],
});
reply = completion.choices[0].message.content;
} catch (e) {
console.error("Error in streaming chatCompletion", e);
} catch (err) {
console.error("Error in non-streaming chatCompletion", err);
options.onError?.(err as Error);
}
}

Expand Down
2 changes: 1 addition & 1 deletion app/store/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ export const useChatStore = createPersistStore(
ChatControllerPool.remove(session.id, botMessage.id);
},
onError(error) {
const isAborted = error.message.includes("aborted");
const isAborted = error.message?.includes("aborted") || false;
botMessage.content +=
"\n\n" +
prettyObject({
Expand Down
7 changes: 7 additions & 0 deletions lib/cache_util.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { AppConfig } from "./config";
export declare function hasModelInCache(modelId: string, appConfig?: AppConfig): Promise<boolean>;
export declare function deleteModelAllInfoInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
export declare function deleteModelInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
export declare function deleteChatConfigInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
export declare function deleteModelWasmInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
//# sourceMappingURL=cache_util.d.ts.map
1 change: 1 addition & 0 deletions lib/cache_util.d.ts.map

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

159 changes: 159 additions & 0 deletions lib/config.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { ResponseFormat } from "./openai_api_protocols";
import { LogitProcessor, InitProgressCallback } from "./types";
/**
* Conversation template config
*/
export interface ConvTemplateConfig {
system_template: string;
system_message: string;
roles: Record<Role, string>;
role_templates?: Partial<Record<Role, string>>;
seps: Array<string>;
role_content_sep?: string;
role_empty_sep?: string;
offset: number;
stop_str: Array<string>;
system_prefix_token_ids?: Array<number>;
stop_token_ids: Array<number>;
add_role_after_system_message?: boolean;
}
export declare enum Role {
user = "user",
assistant = "assistant"
}
/**
* Place holders that can be used in role templates.
* For example, a role template of
* `<<question>> ${MessagePlaceholders.USER} <<function>> ${MessagePlaceholders.FUNCTION}`
* will insert the user message to ${MessagePlaceholders.USER}
* and insert the function message to ${MessagePlaceholders.FUNCTION}
* at run time.
*/
export declare enum MessagePlaceholders {
system = "{system_message}",
user = "{user_message}",
assistant = "{assistant_message}",
tool = "{tool_message}",
function = "{function_string}"
}
/**
* Config of one chat model, a data structure representing `mlc-chat-config.json`.
* This only corresponds to the chat-related fields and `tokenizer_files` of `mlc-chat-config.json`.
* Only these fields affect the conversation in runtime.
* i.e. The third part in https://llm.mlc.ai/docs/get_started/mlc_chat_config.html.
*
* This is initialized in `ChatModule.reload()` with the model's `mlc-chat-config.json`.
*/
export interface ChatConfig {
tokenizer_files: Array<string>;
conv_config?: Partial<ConvTemplateConfig>;
conv_template: string | ConvTemplateConfig;
mean_gen_len: number;
max_gen_len: number;
shift_fill_factor: number;
repetition_penalty: number;
frequency_penalty: number;
presence_penalty: number;
top_p: number;
temperature: number;
bos_token_id?: number;
}
/**
* Custom options that can be used to override known config values.
*/
export interface ChatOptions extends Partial<ChatConfig> {
}
/**
* Optional configurations for `CreateEngine()` and `CreateWebWorkerEngine()`.
*
* chatOpts: To optionally override the `mlc-chat-config.json` of `modelId`.
* appConfig: Configure the app, including the list of models and whether to use IndexedDB cache.
* initProgressCallback: A callback for showing the progress of loading the model.
* logitProcessorRegistry: A register for stateful logit processors, see `webllm.LogitProcessor`.
*
* @note All fields are optional, and `logitProcessorRegistry` is only used for `CreateEngine()`
* not `CreateWebWorkerEngine()`.
*/
export interface EngineConfig {
chatOpts?: ChatOptions;
appConfig?: AppConfig;
initProgressCallback?: InitProgressCallback;
logitProcessorRegistry?: Map<string, LogitProcessor>;
}
/**
* Config for a single generation.
* Essentially `ChatConfig` without `tokenizer_files`, `conv_config`, or `conv_template`.
* We also support additional fields not present in `mlc-chat-config.json` due to OpenAI-like APIs.
*
* Note that all values are optional. If unspecified, we use whatever values in `ChatConfig`
* initialized during `ChatModule.reload()`.
*/
export interface GenerationConfig {
mean_gen_len?: number;
shift_fill_factor?: number;
repetition_penalty?: number;
top_p?: number | null;
temperature?: number | null;
max_gen_len?: number | null;
frequency_penalty?: number | null;
presence_penalty?: number | null;
stop?: string | null | Array<string>;
n?: number | null;
logit_bias?: Record<string, number> | null;
logprobs?: boolean | null;
top_logprobs?: number | null;
response_format?: ResponseFormat | null;
}
export declare function postInitAndCheckGenerationConfigValues(config: GenerationConfig): void;
/**
* Information for a model.
* @param model_url: the huggingface link to download the model weights.
* @param model_id: what we call the model.
* @param model_lib_url: link to the model library (wasm file) the model uses.
* @param vram_required_MB: amount of vram in MB required to run the model (can use
* `utils/vram_requirements` to calculate).
* @param low_resource_required: whether the model can run on limited devices (e.g. Android phone).
* @param buffer_size_required_bytes: required `maxStorageBufferBindingSize`, different for each device.
* @param required_features: feature needed to run this model (e.g. shader-f16).
*/
export interface ModelRecord {
model_url: string;
model_id: string;
model_lib_url: string;
vram_required_MB?: number;
low_resource_required?: boolean;
buffer_size_required_bytes?: number;
required_features?: Array<string>;
}
/**
* Extra configuration that can be
* passed to the load.
*
* @param model_list: models to be used.
* @param useIndexedDBCache: if true, will use IndexedDBCache to cache models and other artifacts.
* If false or unspecified, will use the Cache API. For more information of the two, see:
* https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser
*
* @note Note that the Cache API is more well-tested in WebLLM as of now.
*/
export interface AppConfig {
model_list: Array<ModelRecord>;
useIndexedDBCache?: boolean;
}
/**
* modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the
* `model_lib_url`s in `prebuiltAppConfig`.
*
* @note The model version does not have to match the npm version, since not each npm update
* requires an update of the model libraries.
*/
export declare const modelVersion = "v0_2_34";
export declare const modelLibURLPrefix = "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/";
/**
* Default models and model library mapping to be used if unspecified.
*
* @note This is the only source of truth of which prebuilt model libraries are compatible with the
* current WebLLM npm version.
*/
export declare const prebuiltAppConfig: AppConfig;
//# sourceMappingURL=config.d.ts.map
1 change: 1 addition & 0 deletions lib/config.d.ts.map

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions lib/conversation.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { ConvTemplateConfig, Role } from "./config";
/**
* Helper to keep track of history conversations.
*/
export declare class Conversation {
messages: Array<[Role, string, string | undefined]>;
readonly config: ConvTemplateConfig;
function_string: string;
use_function_calling: boolean;
override_system_message?: string;
constructor(config: ConvTemplateConfig);
private getPromptArrayInternal;
/**
* Get prompt arrays with the first one as system.
*
* @returns The prompt array.
*/
getPromptArray(): Array<string>;
/**
* Get the last round of prompt has not been fed as input.
*
* @note This function needs to be used with the assumption that
* the caller call appendMessage then appendReplyHeader.
*
* @returns The prompt array.
*/
getPrompArrayLastRound(): string[];
/**
* Resets all states for this.conversation.
*/
reset(): void;
getStopStr(): string[];
getStopTokens(): number[];
appendMessage(role: Role, message: string, role_name?: string): void;
appendReplyHeader(role: Role): void;
finishReply(message: string): void;
}
export declare function getConversation(conv_template: string | ConvTemplateConfig, conv_config?: Partial<ConvTemplateConfig>): Conversation;
/**
* Compare the states of two conversation instances. Equality is defined as their getPromptArray()
* should return the exact same things, which is determined by fields: messages, function_string,
* use_function_calling, and override_system_message.
*
* @returns True if `convA` equals to `convB`
* @note We assume convA and convB has the same `this.config`.
*/
export declare function compareConversationObject(convA: Conversation, convB: Conversation): boolean;
//# sourceMappingURL=conversation.d.ts.map
1 change: 1 addition & 0 deletions lib/conversation.d.ts.map

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

114 changes: 114 additions & 0 deletions lib/engine.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import * as API from "./openai_api_protocols/apis";
import { ChatOptions, AppConfig, GenerationConfig, EngineConfig } from "./config";
import { ChatCompletionRequest, ChatCompletion, ChatCompletionChunk, ChatCompletionFinishReason, ChatCompletionRequestNonStreaming, ChatCompletionRequestStreaming, ChatCompletionRequestBase } from "./openai_api_protocols/index";
import { InitProgressCallback, EngineInterface, GenerateProgressCallback, LogitProcessor } from "./types";
/**
* Creates `Engine`, and loads `modelId` onto WebGPU.
*
* Equivalent to `new webllm.Engine().reload(...)`.
*
* @param modelId The model to load, needs to either be in `webllm.prebuiltAppConfig`, or in
* `engineConfig.appConfig`.
* @param engineConfig Optionally configures the engine, see `webllm.EngineConfig`.
* @returns An intialized `WebLLM.Engine` with `modelId` loaded.
*/
export declare function CreateEngine(modelId: string, engineConfig?: EngineConfig): Promise<Engine>;
/**
* The main interface of Engine, which loads a model and performs tasks.
*
* You can either initialize one with `webllm.CreateEngine(modelId)`, or `webllm.Engine().reload(modelId)`.
*/
export declare class Engine implements EngineInterface {
chat: API.Chat;
private currentModelId?;
private logger;
private logitProcessorRegistry?;
private logitProcessor?;
private pipeline?;
private initProgressCallback?;
private interruptSignal;
private deviceLostIsError;
private config?;
constructor();
setInitProgressCallback(initProgressCallback?: InitProgressCallback): void;
getInitProgressCallback(): InitProgressCallback | undefined;
setLogitProcessorRegistry(logitProcessorRegistry?: Map<string, LogitProcessor>): void;
reload(modelId: string, chatOpts?: ChatOptions, appConfig?: AppConfig): Promise<void>;
generate(input: string | ChatCompletionRequestNonStreaming, progressCallback?: GenerateProgressCallback, streamInterval?: number, genConfig?: GenerationConfig): Promise<string>;
private _generate;
/**
* Similar to `generate()`; but instead of using callback, we use an async iterable.
* @param request Request for chat completion.
* @param genConfig Generation config extraced from `request`.
*/
chatCompletionAsyncChunkGenerator(request: ChatCompletionRequestStreaming, genConfig: GenerationConfig): AsyncGenerator<ChatCompletionChunk, void, void>;
/**
* Completes a single ChatCompletionRequest.
*
* @param request A OpenAI-style ChatCompletion request.
*
* @note For each choice (i.e. `n`), a request is defined by a single `prefill()` and mulitple
* `decode()`. This is important as it determines the behavior of various fields including `seed`.
*/
chatCompletion(request: ChatCompletionRequestNonStreaming): Promise<ChatCompletion>;
chatCompletion(request: ChatCompletionRequestStreaming): Promise<AsyncIterable<ChatCompletionChunk>>;
chatCompletion(request: ChatCompletionRequestBase): Promise<AsyncIterable<ChatCompletionChunk> | ChatCompletion>;
interruptGenerate(): Promise<void>;
runtimeStatsText(): Promise<string>;
resetChat(keepStats?: boolean): Promise<void>;
unload(): Promise<void>;
getMaxStorageBufferBindingSize(): Promise<number>;
getGPUVendor(): Promise<string>;
forwardTokensAndSample(inputIds: Array<number>, isPrefill: boolean): Promise<number>;
/**
* @returns Whether the generation stopped.
*/
stopped(): boolean;
/**
* @returns Finish reason; undefined if generation not started/stopped yet.
*/
getFinishReason(): ChatCompletionFinishReason | undefined;
/**
* Get the current generated response.
*
* @returns The current output message.
*/
getMessage(): Promise<string>;
/**
* Get a new Conversation object based on the chat completion request.
*
* @param request The incoming ChatCompletionRequest
* @note `request.messages[-1]` is not included as it would be treated as a normal input to
* `prefill()`.
*/
private getConversationFromChatCompletionRequest;
/**
* Returns the function string based on the request.tools and request.tool_choice, raises erros if
* encounter invalid request.
*
* @param request The chatCompletionRequest we are about to prefill for.
* @returns The string used to set Conversatoin.function_string
*/
private getFunctionCallUsage;
/**
* Run a prefill step with a given input.
*
* If `input` is a chatCompletionRequest, we treat `input.messages[-1]` as the usual user input.
* We then convert `input.messages[:-1]` to a `Conversation` object, representing a conversation
* history.
*
* If the new `Conversation` object matches the current one loaded, it means we are
* performing multi-round chatting, so we do not reset, hence reusing KV cache. Otherwise, we
* reset every thing, treating the request as something completely new.
*
* @param input The input prompt, or `messages` in OpenAI-like APIs.
*/
prefill(input: string | ChatCompletionRequest, genConfig?: GenerationConfig): Promise<void>;
/**
* Run a decode step to decode the next token.
*/
decode(genConfig?: GenerationConfig): Promise<void>;
private getPipeline;
private asyncLoadTokenizer;
}
//# sourceMappingURL=engine.d.ts.map
Loading

0 comments on commit 8c2933b

Please sign in to comment.