From 0c28da97da97db8c11c350de166f627a9a55b278 Mon Sep 17 00:00:00 2001
From: Nestor Qin <imba.qxy@gmail.com>
Date: Thu, 16 May 2024 05:04:59 -0400
Subject: [PATCH] fix: Restart service worker after it is killed

---
 .env.template               |  63 -------------
 app/client/api.ts           |   2 -
 app/client/webllm.ts        | 181 ++++++++++++++++++++++--------------
 app/components/chat.tsx     |   9 +-
 app/components/home.tsx     |  53 +++++++----
 app/components/settings.tsx |   1 -
 app/service-worker.ts       |  53 +++++++++--
 package.json                |   4 +-
 public/ping.txt             |   1 +
 yarn.lock                   |  18 +---
 10 files changed, 199 insertions(+), 186 deletions(-)
 delete mode 100644 .env.template
 create mode 100644 public/ping.txt
diff --git a/.env.template b/.env.template
deleted file mode 100644
index b2a0438d..00000000
--- a/.env.template
+++ /dev/null
@@ -1,63 +0,0 @@
-
-# Your openai api key. (required)
-OPENAI_API_KEY=sk-xxxx
-
-# Access password, separated by comma. (optional)
-CODE=your-password
-
-# You can start service behind a proxy
-PROXY_URL=http://localhost:7890
-
-# (optional)
-# Default: Empty
-# Googel Gemini Pro API key, set if you want to use Google Gemini Pro API.
-GOOGLE_API_KEY=
-
-# (optional)
-# Default: https://generativelanguage.googleapis.com/
-# Googel Gemini Pro API url without pathname, set if you want to customize Google Gemini Pro API url.
-GOOGLE_URL=
-
-# Override openai api request base url. (optional)
-# Default: https://api.openai.com
-# Examples: http://your-openai-proxy.com
-BASE_URL=
-
-# Specify OpenAI organization ID.(optional)
-# Default: Empty
-OPENAI_ORG_ID=
-
-# (optional)
-# Default: Empty
-# If you do not want users to use GPT-4, set this value to 1.
-DISABLE_GPT4=
-
-# (optional)
-# Default: Empty
-# If you do not want users to input their own API key, set this value to 1.
-HIDE_USER_API_KEY=
-
-# (optional)
-# Default: Empty
-# If you do want users to query balance, set this value to 1.
-ENABLE_BALANCE_QUERY=
-
-# (optional)
-# Default: Empty
-# If you want to disable parse settings from url, set this value to 1.
-DISABLE_FAST_LINK=
-
-
-# anthropic claude Api Key.(optional)
-ANTHROPIC_API_KEY=
-
-### anthropic claude Api version. (optional)
-ANTHROPIC_API_VERSION=
-
-
-
-### anthropic claude Api url (optional)
-ANTHROPIC_URL=
-
-### (optional)
-WHITE_WEBDEV_ENDPOINTS=
\ No newline at end of file
diff --git a/app/client/api.ts b/app/client/api.ts
index e7ffe0aa..3b1ef64f 100644
--- a/app/client/api.ts
+++ b/app/client/api.ts
@@ -58,7 +58,5 @@ export interface LLMModelProvider {
 export abstract class LLMApi {
   abstract chat(options: ChatOptions): Promise<void>;
   abstract usage(): Promise<LLMUsage>;
-  abstract models(): Promise<LLMModel[]>;
   abstract abort(): Promise<void>;
-  abstract clear(): void;
 }
diff --git a/app/client/webllm.ts b/app/client/webllm.ts
index cc96a625..34c03303 100644
--- a/app/client/webllm.ts
+++ b/app/client/webllm.ts
@@ -1,92 +1,143 @@
 import { createContext } from "react";
 import {
-  CreateWebServiceWorkerEngine,
   InitProgressReport,
   prebuiltAppConfig,
   ChatCompletionMessageParam,
-  WebServiceWorkerEngine,
+  ServiceWorkerEngine,
+  ServiceWorker,
+  ChatCompletionChunk,
+  ChatCompletion,
 } from "@neet-nestor/web-llm";
 
-import { ChatOptions, LLMApi, LLMConfig } from "./api";
+import { ChatOptions, LLMApi, LLMConfig, RequestMessage } from "./api";
+
+const KEEP_ALIVE_INTERVAL = 10000;
 
 export class WebLLMApi implements LLMApi {
-  private currentModel?: string;
-  private engine?: WebServiceWorkerEngine;
+  private llmConfig?: LLMConfig;
+  engine?: ServiceWorkerEngine;
 
-  constructor(onEngineCrash: () => void) {
-    setInterval(() => {
-      if ((this.engine?.missedHeatbeat || 0) > 2) {
-        onEngineCrash?.();
-      }
-    }, 10000);
+  constructor() {
+    this.engine = new ServiceWorkerEngine(new ServiceWorker());
+    this.engine.keepAlive(
+      window.location.href + "ping.txt",
+      KEEP_ALIVE_INTERVAL,
+    );
+  }
+
+  async initModel(onUpdate?: (message: string, chunk: string) => void) {
+    if (!this.llmConfig) {
+      throw Error("llmConfig is undefined");
+    }
+    if (!this.engine) {
+      this.engine = new ServiceWorkerEngine(new ServiceWorker());
+    }
+    let hasResponse = false;
+    this.engine.setInitProgressCallback((report: InitProgressReport) => {
+      onUpdate?.(report.text, report.text);
+      hasResponse = true;
+    });
+    let initRequest = this.engine.init(this.llmConfig.model, this.llmConfig, {
+      ...prebuiltAppConfig,
+      useIndexedDBCache: this.llmConfig.cache === "index_db",
+    });
+    // In case the service worker is dead, init will halt indefinitely
+    // so we manually retry if timeout
+    let retry = 0;
+    let engine = this.engine;
+    let llmConfig = this.llmConfig;
+    let retryInterval: NodeJS.Timeout;
+
+    await new Promise<void>((resolve, reject) => {
+      retryInterval = setInterval(() => {
+        if (hasResponse) {
+          clearInterval(retryInterval);
+          initRequest.then(resolve);
+          return;
+        }
+        if (retry >= 5) {
+          clearInterval(retryInterval);
+          reject("Model initialization timed out for too many times");
+          return;
+        }
+        retry += 1;
+        initRequest = engine.init(llmConfig.model, llmConfig, {
+          ...prebuiltAppConfig,
+          useIndexedDBCache: llmConfig.cache === "index_db",
+        });
+      }, 5000);
+    });
   }
 
-  clear() {
-    this.engine = undefined;
+  isConfigChanged(config: LLMConfig) {
+    return (
+      this.llmConfig?.model !== config.model ||
+      this.llmConfig?.cache !== config.cache ||
+      this.llmConfig?.temperature !== config.temperature ||
+      this.llmConfig?.top_p !== config.top_p ||
+      this.llmConfig?.presence_penalty !== config.presence_penalty ||
+      this.llmConfig?.frequency_penalty !== config.frequency_penalty
+    );
   }
 
-  async initModel(
-    config: LLMConfig,
+  async chatCompletion(
+    stream: boolean,
+    messages: RequestMessage[],
     onUpdate?: (message: string, chunk: string) => void,
   ) {
-    this.currentModel = config.model;
-    this.engine = await CreateWebServiceWorkerEngine(config.model, {
-      chatOpts: {
-        temperature: config.temperature,
-        top_p: config.top_p,
-        presence_penalty: config.presence_penalty,
-        frequency_penalty: config.frequency_penalty,
-      },
-      appConfig: {
-        ...prebuiltAppConfig,
-        useIndexedDBCache: config.cache === "index_db",
-      },
-      initProgressCallback: (report: InitProgressReport) => {
-        onUpdate?.(report.text, report.text);
-      },
+    let reply: string | null = "";
+
+    const completion = await this.engine!.chatCompletion({
+      stream: stream,
+      messages: messages as ChatCompletionMessageParam[],
     });
+
+    if (stream) {
+      const asyncGenerator = completion as AsyncIterable<ChatCompletionChunk>;
+      for await (const chunk of asyncGenerator) {
+        if (chunk.choices[0].delta.content) {
+          reply += chunk.choices[0].delta.content;
+          onUpdate?.(reply, chunk.choices[0].delta.content);
+        }
+      }
+      return reply;
+    }
+    return (completion as ChatCompletion).choices[0].message.content;
   }
 
   async chat(options: ChatOptions): Promise<void> {
-    if (options.config.model !== this.currentModel) {
+    // in case the service worker is dead, revive it by firing a fetch event
+    fetch("/ping.txt");
+
+    if (this.isConfigChanged(options.config)) {
+      this.llmConfig = options.config;
       try {
-        await this.initModel(options.config, options.onUpdate);
+        await this.initModel(options.onUpdate);
       } catch (e) {
         console.error("Error in initModel", e);
       }
     }
 
     let reply: string | null = "";
-    if (options.config.stream) {
-      try {
-        const asyncChunkGenerator = await this.engine!.chatCompletion({
-          stream: options.config.stream,
-          messages: options.messages as ChatCompletionMessageParam[],
-        });
-
-        for await (const chunk of asyncChunkGenerator) {
-          if (chunk.choices[0].delta.content) {
-            reply += chunk.choices[0].delta.content;
-            options.onUpdate?.(reply, chunk.choices[0].delta.content);
-          }
-        }
-      } catch (err) {
-        console.error("Error in streaming chatCompletion", err);
-        options.onError?.(err as Error);
-        return;
-      }
-    } else {
-      try {
-        const completion = await this.engine!.chatCompletion({
-          stream: options.config.stream,
-          messages: options.messages as ChatCompletionMessageParam[],
-        });
-        reply = completion.choices[0].message.content;
-      } catch (err) {
-        console.error("Error in non-streaming chatCompletion", err);
+    try {
+      reply = await this.chatCompletion(
+        !!options.config.stream,
+        options.messages,
+        options.onUpdate,
+      );
+    } catch (err: any) {
+      if (err.toString().includes("Please call `Engine.reload(model)` first")) {
+        console.error("Error in chatCompletion", err);
         options.onError?.(err as Error);
         return;
       }
+      // Service worker has been stopped. Restart it
+      await this.initModel(options.onUpdate);
+      reply = await this.chatCompletion(
+        !!options.config.stream,
+        options.messages,
+        options.onUpdate,
+      );
     }
 
     if (reply) {
@@ -106,18 +157,6 @@ export class WebLLMApi implements LLMApi {
       total: 0,
     };
   }
-
-  async models() {
-    return prebuiltAppConfig.model_list.map((record) => ({
-      name: record.model_id,
-      available: true,
-      provider: {
-        id: "huggingface",
-        providerName: "huggingface",
-        providerType: "huggingface",
-      },
-    }));
-  }
 }
 
 export const WebLLMContext = createContext<WebLLMApi | null>(null);
diff --git a/app/components/chat.tsx b/app/components/chat.tsx
index 4004999a..b831ceae 100644
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@@ -97,7 +97,7 @@ import { ExportMessageModal } from "./exporter";
 import { getClientConfig } from "../config/client";
 import { useAllModels } from "../utils/hooks";
 import { MultimodalContent } from "../client/api";
-import { WebLLMApi, WebLLMContext } from "../client/webllm";
+import { WebLLMContext } from "../client/webllm";
 
 const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
   loading: () => <LoadingIcon />,
@@ -682,8 +682,7 @@ function _Chat() {
   const navigate = useNavigate();
   const [attachImages, setAttachImages] = useState<string[]>([]);
   const [uploading, setUploading] = useState(false);
-
-  const webllm = useContext(WebLLMContext);
+  const webllm = useContext(WebLLMContext)!;
 
   // prompt hints
   const promptStore = usePromptStore();
@@ -764,7 +763,7 @@ function _Chat() {
     if (isStreaming) return;
     setIsLoading(true);
     chatStore
-      .onUserInput(userInput, webllm!, attachImages)
+      .onUserInput(userInput, webllm, attachImages)
       .then(() => setIsLoading(false));
     setAttachImages([]);
     localStorage.setItem(LAST_INPUT_KEY, userInput);
@@ -922,7 +921,7 @@ function _Chat() {
     const textContent = getMessageTextContent(userMessage);
     const images = getMessageImages(userMessage);
     chatStore
-      .onUserInput(textContent, webllm!, images)
+      .onUserInput(textContent, webllm, images)
       .then(() => setIsLoading(false));
     inputRef.current?.focus();
   };
diff --git a/app/components/home.tsx b/app/components/home.tsx
index cc35d106..41e9e662 100644
--- a/app/components/home.tsx
+++ b/app/components/home.tsx
@@ -28,6 +28,7 @@ import { useAppConfig } from "../store/config";
 import { getClientConfig } from "../config/client";
 import { WebLLMApi, WebLLMContext } from "../client/webllm";
 import Locale from "../locales";
+import { prebuiltAppConfig } from "@neet-nestor/web-llm";
 
 export function Loading(props: { noLogo?: boolean }) {
   return (
@@ -177,32 +178,48 @@ function Screen() {
   );
 }
 
-export function useLoadData(webllm: WebLLMApi) {
+export function useLoadData() {
   const config = useAppConfig();
 
   useEffect(() => {
-    (async () => {
-      if (webllm) {
-        const models = await webllm.models();
-        config.mergeModels(models);
-      }
-    })();
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [webllm]);
+    config.mergeModels(
+      prebuiltAppConfig.model_list.map((record) => ({
+        name: record.model_id,
+        available: true,
+        provider: {
+          id: "huggingface",
+          providerName: "huggingface",
+          providerType: "huggingface",
+        },
+      })),
+    );
+  }, []);
 }
 
+const useWebLLM = () => {
+  const [webllm, setWebLLM] = useState<WebLLMApi | null>(null);
+  const [isSWAlive, setSWAlive] = useState(true);
+
+  useEffect(() => {
+    setWebLLM(new WebLLMApi());
+  }, []);
+
+  setInterval(() => {
+    if (webllm) {
+      // 10s per heartbeat, dead after 1 min of inactivity
+      setSWAlive(!!webllm.engine && webllm.engine.missedHeatbeat < 6);
+    }
+  });
+
+  return { webllm, isWebllmAlive: isSWAlive };
+};
+
 export function Home() {
   const hasHydrated = useHasHydrated();
   const isServiceWorkerReady = useServiceWorkerReady();
-  const [isEngineCrash, setEngineCrash] = useState(false);
-
-  const webllm = useMemo(() => {
-    return new WebLLMApi(() => {
-      setEngineCrash(true);
-    });
-  }, []);
+  const { webllm, isWebllmAlive } = useWebLLM();
 
-  useLoadData(webllm);
+  useLoadData();
   useSwitchTheme();
   useHtmlLang();
 
@@ -210,7 +227,7 @@ export function Home() {
     return <Loading />;
   }
 
-  if (isEngineCrash) {
+  if (!isWebllmAlive) {
     return <ErrorScreen message={Locale.ServiceWorker.Error} />;
   }
 
diff --git a/app/components/settings.tsx b/app/components/settings.tsx
index f44a9256..fff92caf 100644
--- a/app/components/settings.tsx
+++ b/app/components/settings.tsx
@@ -471,7 +471,6 @@ export function Settings() {
             <Select
               value="cache"
               onChange={(e) => {
-                webllm?.clear();
                 updateConfig(
                   (config) =>
                     (config.cacheType = e.currentTarget
diff --git a/app/service-worker.ts b/app/service-worker.ts
index 65090f35..689e6c26 100644
--- a/app/service-worker.ts
+++ b/app/service-worker.ts
@@ -1,9 +1,11 @@
 import {
-  WebServiceWorkerEngineHandler,
+  ServiceWorkerEngineHandler,
   EngineInterface,
   Engine,
 } from "@neet-nestor/web-llm";
+import { defaultCache } from "@serwist/next/worker";
 import type { PrecacheEntry, SerwistGlobalConfig } from "serwist";
+import { CacheFirst, ExpirationPlugin, Serwist } from "serwist";
 
 // This declares the value of `injectionPoint` to TypeScript.
 // `injectionPoint` is the string that will be replaced by the
@@ -14,27 +16,60 @@ declare global {
     __SW_MANIFEST: (PrecacheEntry | string)[] | undefined;
   }
 }
-// Note: this line is REQUIRED for Serwist to build
-self.__SW_MANIFEST;
+const serwist = new Serwist({
+  precacheEntries: self.__SW_MANIFEST,
+  skipWaiting: true,
+  clientsClaim: true,
+  navigationPreload: true,
+  runtimeCaching: [
+    ...defaultCache,
+    {
+      matcher: ({ sameOrigin, url: { pathname } }) =>
+        sameOrigin && pathname === "/ping.txt",
+      handler: new CacheFirst({
+        cacheName: "WebLLMChatServiceWorkerKeepAlive",
+        plugins: [
+          new ExpirationPlugin({
+            maxEntries: 1,
+            maxAgeSeconds: 365 * 24 * 60 * 60, // 365 days
+            maxAgeFrom: "last-used",
+          }),
+        ],
+      }),
+    },
+  ],
+});
 
 declare const self: ServiceWorkerGlobalScope;
 
 const CHATGPT_NEXT_WEB_CACHE = "chatgpt-next-web-cache";
 const engine: EngineInterface = new Engine();
-let handler: WebServiceWorkerEngineHandler;
+let handler: ServiceWorkerEngineHandler;
 
-self.addEventListener("install", function (event) {
+self.addEventListener("install", (event) => {
   // Always update right away
   self.skipWaiting();
 
   event.waitUntil(
-    caches.open(CHATGPT_NEXT_WEB_CACHE).then(function (cache) {
+    caches.open(CHATGPT_NEXT_WEB_CACHE).then((cache) => {
       return cache.addAll([]);
     }),
   );
 });
 
-self.addEventListener("activate", function (event) {
-  handler = new WebServiceWorkerEngineHandler(engine);
-  console.log("Web-LLM Service Worker Activated");
+self.addEventListener("activate", (event) => {
+  if (!handler) {
+    handler = new ServiceWorkerEngineHandler(engine);
+    console.log("Service Worker: Web-LLM Engine Activated");
+  }
+});
+
+self.addEventListener("fetch", (event) => {
+  console.log("sw fetch handler");
+  if (!handler) {
+    handler = new ServiceWorkerEngineHandler(engine);
+    console.log("Service Worker: Web-LLM Engine Activated");
+  }
 });
+
+serwist.addEventListeners();
diff --git a/package.json b/package.json
index 93ec6c53..0e4b2989 100644
--- a/package.json
+++ b/package.json
@@ -17,12 +17,10 @@
   "dependencies": {
     "@fortaine/fetch-event-source": "^3.0.6",
     "@hello-pangea/dnd": "^16.5.0",
-    "@neet-nestor/web-llm": "0.2.37",
+    "@neet-nestor/web-llm": "0.2.41",
     "@next/third-parties": "^14.1.0",
     "@serwist/next": "^9.0.2",
     "@svgr/webpack": "^6.5.1",
-    "@vercel/analytics": "^0.1.11",
-    "@vercel/speed-insights": "^1.0.2",
     "emoji-picker-react": "^4.9.2",
     "fuse.js": "^7.0.0",
     "html-to-image": "^1.11.11",
diff --git a/public/ping.txt b/public/ping.txt
new file mode 100644
index 00000000..ef4fd0d0
--- /dev/null
+++ b/public/ping.txt
@@ -0,0 +1 @@
+Pong
\ No newline at end of file
diff --git a/yarn.lock b/yarn.lock
index d841740b..6fefc01f 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2089,10 +2089,10 @@
     "@jridgewell/resolve-uri" "^3.1.0"
     "@jridgewell/sourcemap-codec" "^1.4.14"
 
-"@neet-nestor/web-llm@0.2.37":
-  version "0.2.37"
-  resolved "https://registry.yarnpkg.com/@neet-nestor/web-llm/-/web-llm-0.2.37.tgz#dfb7e81387066f300162ca45bce9a9ccd432aa56"
-  integrity sha512-lQQkG4vMOYVyf5F+rNCIHBoBWtViBMvb0nCd5yCWEwxcijv4uECRPoYpCa8lhdYmEFmN2li9D9cfNw6AgAde9g==
+"@neet-nestor/web-llm@0.2.41":
+  version "0.2.41"
+  resolved "https://registry.yarnpkg.com/@neet-nestor/web-llm/-/web-llm-0.2.41.tgz#ff90892d0dc20165153eebd5ea4e92d7e7308fe8"
+  integrity sha512-AEmo4I8PoKGfn486oo//18YZIw6VUdpAKlkSoRMY0EbCvdur8gZ3Y6mi1+YKwHk6FJk6rmpOStn8KC4tmjwU0Q==
 
 "@next/env@13.4.9":
   version "13.4.9"
@@ -2742,16 +2742,6 @@
     "@typescript-eslint/types" "6.4.0"
     eslint-visitor-keys "^3.4.1"
 
-"@vercel/analytics@^0.1.11":
-  version "0.1.11"
-  resolved "https://registry.yarnpkg.com/@vercel/analytics/-/analytics-0.1.11.tgz#727a0ac655a4a89104cdea3e6925476470299428"
-  integrity sha512-mj5CPR02y0BRs1tN3oZcBNAX9a8NxsIUl9vElDPcqxnMfP0RbRc9fI9Ud7+QDg/1Izvt5uMumsr+6YsmVHcyuw==
-
-"@vercel/speed-insights@^1.0.2":
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/@vercel/speed-insights/-/speed-insights-1.0.2.tgz#1bebf3e7c7046b6a911721233b263b69214ddb3e"
-  integrity sha512-y5HWeB6RmlyVYxJAMrjiDEz8qAIy2cit0fhBq+MD78WaUwQvuBnQlX4+5MuwVTWi46bV3klaRMq83u9zUy1KOg==
-
 "@webassemblyjs/ast@1.11.6", "@webassemblyjs/ast@^1.11.5":
   version "1.11.6"
   resolved "https://registry.npmmirror.com/@webassemblyjs/ast/-/ast-1.11.6.tgz#db046555d3c413f8966ca50a95176a0e2c642e24"