From 3b63767b0b8c01edf0a6e122341e28bbf42f035c Mon Sep 17 00:00:00 2001 From: JonLuca DeCaro Date: Wed, 19 Apr 2023 01:01:59 -0700 Subject: [PATCH] clean up logging for workers, improve query to only include semantic results --- electron-src/data/database.ts | 81 ++++++++++++------- electron-src/index.ts | 10 +-- electron-src/logs.ts | 15 ---- .../semantic-search/semantic-search.ts | 13 ++- electron-src/utils/flags.ts | 6 +- electron-src/utils/logger.ts | 21 ++++- electron-src/window/main-window.ts | 3 +- electron-src/window/menu.ts | 3 +- next.config.mjs | 2 + src/components/global-search/GlobalSearch.tsx | 4 +- 10 files changed, 91 insertions(+), 67 deletions(-) delete mode 100644 electron-src/logs.ts diff --git a/electron-src/data/database.ts b/electron-src/data/database.ts index a109967..e05fe6a 100644 --- a/electron-src/data/database.ts +++ b/electron-src/data/database.ts @@ -2,7 +2,7 @@ import type { SelectQueryBuilder, Kysely } from "kysely"; import { sql } from "kysely"; import type { DB as MesssagesDatabase } from "../../_generated/types"; import logger from "../utils/logger"; -import { countBy, groupBy, partition } from "lodash-es"; +import { countBy, groupBy, uniq } from "lodash-es"; import type { Contact } from "electron-mac-contacts"; import { decodeMessageBuffer, getTextFromBuffer } from "../utils/buffer"; import { appMessagesDbCopy } from "../utils/constants"; @@ -107,7 +107,7 @@ export class SQLDatabase extends BaseDatabase { getMessageGuidsFromText = async (texts: string[]) => { const db = this.db; - const query = db.selectFrom("message").select(["guid", "text"]).where("text", "in", texts); + const query = db.selectFrom("message").select(["guid", "text"]).where("text", "in", texts).limit(10000); const results = await query.execute(); const indexMap = new Map(); for (let i = 0; i < texts.length; i++) { @@ -132,8 +132,6 @@ export class SQLDatabase extends BaseDatabase { endDate?: Date | null, ) => { const db = this.db; - // SELECT * FROM message_fts WHERE text MATCH 'jonluca' ORDER BY rank; - const cleanedQuery = searchTerm.replace(/[^a-zA-Z0-9 ]/g, ""); const textMatch = await db .selectFrom("message_fts") @@ -143,27 +141,29 @@ export class SQLDatabase extends BaseDatabase { .limit(1000) .execute(); const messageGuids = textMatch.map((m) => m.message_id as string); - return this.fullTextMessageSearchWithGuids(messageGuids, searchTerm, chatIds, handleIds, startDate, endDate); + const query = this.getFilteredSearchQuery({ chatIds, handleIds, startDate, endDate }) + .where(({ or, cmpr }) => { + return or([cmpr("filename", "like", "%" + searchTerm + "%"), cmpr("text", "like", "%" + searchTerm + "%")]); + }) + .limit(1000); + const alternateResults = await query.execute(); + const alternateMessageGuids = alternateResults.map((m) => m.guid as string); + const allMessageGuids = uniq([...messageGuids, ...alternateMessageGuids]); + return this.fullTextMessageSearchWithGuids(allMessageGuids, searchTerm, chatIds, handleIds, startDate, endDate); }; - fullTextMessageSearchWithGuids = async ( - messageGuids: string[], - searchTerm: string, - chatIds?: number[], - handleIds?: number[], - startDate?: Date | null, - endDate?: Date | null, - ) => { - const messageGuidsSet = new Set(messageGuids); - + private getFilteredSearchQuery = ({ + chatIds, + handleIds, + startDate, + endDate, + }: { + chatIds?: number[]; + handleIds?: number[]; + startDate?: Date | null; + endDate?: Date | null; + }) => { let query = this.getJoinedMessageQuery() .select("message.guid as guid") - .where(({ or, cmpr }) => { - return or([ - cmpr("message.guid", "in", messageGuids), - cmpr("filename", "like", "%" + searchTerm + "%"), - cmpr("text", "like", "%" + searchTerm + "%"), - ]); - }) .where("item_type", "not in", [1, 3, 4, 5, 6]) .where("associated_message_type", "=", 0); @@ -193,17 +193,36 @@ export class SQLDatabase extends BaseDatabase { query = query.where("date", "<", offset); } + return query; + }; + fullTextMessageSearchWithGuids = async ( + messageGuids: string[], + searchTerm: string, + chatIds?: number[], + handleIds?: number[], + startDate?: Date | null, + endDate?: Date | null, + ) => { + const query = this.getFilteredSearchQuery({ chatIds, handleIds, startDate, endDate }).where( + "message.guid", + "in", + messageGuids, + ); + const messages = await query.limit(10000).execute(); - const [matchedMessages, unmatchedMessages] = partition(messages, (m) => messageGuidsSet.has(m.guid)); - logger.info(`Matched ${matchedMessages.length} messages, unmatched ${unmatchedMessages.length} messages`); - matchedMessages.sort((a, b) => { - return messageGuids.indexOf(a.guid) - messageGuids.indexOf(b.guid); - }); - unmatchedMessages.sort((a, b) => { - return (b.date || 0) - (a.date || 0); + const indexMap = new Map(); + for (let i = 0; i < messageGuids.length; i++) { + indexMap.set(messageGuids[i], i); + } + messages.sort((a, b) => { + const aIndex = indexMap.get(a.guid!); + const bIndex = indexMap.get(b.guid!); + if (aIndex === undefined || bIndex === undefined) { + return 0; + } + return aIndex - bIndex; }); - const allMessages = [...matchedMessages, ...unmatchedMessages]; - return this.enhanceMessageResponses<(typeof messages)[number]>(allMessages); + return this.enhanceMessageResponses<(typeof messages)[number]>(messages); }; private convertDate = (date: number) => { return new Date(date / 1000000 + 978307200000); diff --git a/electron-src/index.ts b/electron-src/index.ts index 84ce8f9..165a20e 100644 --- a/electron-src/index.ts +++ b/electron-src/index.ts @@ -17,13 +17,11 @@ import { getMenu } from "./window/menu"; import "better-sqlite3"; import { mainAppIconDevPng } from "./constants"; -import logger, { fileLogFormat } from "./utils/logger"; +import logger, { logPath, logStream } from "./utils/logger"; import { setupRouteHandlers } from "./utils/routes"; import { DESKTOP_VERSION } from "./versions"; import { autoUpdater } from "electron-updater"; import dbWorker from "./workers/database-worker"; -import winston from "winston"; -import { logPath, logStream } from "./logs"; registerContextMenu({ showSaveImageAs: true, @@ -42,12 +40,6 @@ let errorTries = 0; const MAX_ERROR_TRIES = 5; const amMainInstance = app.requestSingleInstanceLock(); -logger.add( - new winston.transports.Stream({ - stream: logStream, - format: fileLogFormat, - }), -); logger.info(`Starting logging to ${logPath}`); if (!amMainInstance) { logStream.write("Not the main instance - quitting"); diff --git a/electron-src/logs.ts b/electron-src/logs.ts deleted file mode 100644 index c0c2069..0000000 --- a/electron-src/logs.ts +++ /dev/null @@ -1,15 +0,0 @@ -import fs, { createWriteStream } from "fs"; -import path from "path"; -import { threadId } from "node:worker_threads"; -import os from "os"; -import { appPath } from "./versions"; - -const isDev = process.env.NODE_ENV !== "production"; -const Logs = path.join(os.homedir(), "Library", "Logs", appPath); -// ensure directory exists recursively -if (!fs.existsSync(Logs)) { - fs.mkdirSync(Logs, { recursive: true }); -} -const workerPrefix = threadId ? `worker-${threadId}-` : ""; -export const logPath = path.join(Logs, `${isDev ? "dev-" : ""}${workerPrefix}run-${new Date().toISOString()}.log`); -export const logStream = createWriteStream(logPath); diff --git a/electron-src/semantic-search/semantic-search.ts b/electron-src/semantic-search/semantic-search.ts index ba9da85..69d6c65 100644 --- a/electron-src/semantic-search/semantic-search.ts +++ b/electron-src/semantic-search/semantic-search.ts @@ -136,6 +136,7 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts) let floatEmbedding = existingEmbedding?.embedding; if (!existingEmbedding) { + const now = performance.now(); const configuration = new Configuration({ apiKey: openAiKey, }); @@ -145,6 +146,7 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts) input: queryText, model: OPENAI_EMBEDDING_MODEL, }); + logger.info(`Got embedding from OpenAI in ${performance.now() - now}ms`); const embed = openAiResponse.data; const embedding = embed.data?.[0]?.embedding; if (!embedding) { @@ -155,7 +157,10 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts) floatEmbedding = new Float32Array(embedding); } - return dbWorker.embeddingsWorker.calculateSimilarity(floatEmbedding!); + const now = performance.now(); + const calculateSimilarity = await dbWorker.embeddingsWorker.calculateSimilarity(floatEmbedding!); + logger.info(`Calculated similarity in ${performance.now() - now}ms`); + return calculateSimilarity; } handleIpc("createEmbeddings", async ({ openAiKey: openAiKey }) => { @@ -196,14 +201,14 @@ handleIpc( } if (useSemanticSearch) { logger.info("Using semantic search"); + const now = performance.now(); const messageTexts = await semanticQuery({ openAiKey, queryText: searchTerm, }); - logger.info(`Got ${messageTexts.length} results`); - + logger.info(`Got ${messageTexts.length} results in ${performance.now() - now}ms`); const guids = await dbWorker.worker.getMessageGuidsFromText(messageTexts); - + logger.info(`Got ${guids.length} guids from text`); return await dbWorker.worker.fullTextMessageSearchWithGuids( guids, searchTerm, diff --git a/electron-src/utils/flags.ts b/electron-src/utils/flags.ts index 61e73ff..bf4f165 100644 --- a/electron-src/utils/flags.ts +++ b/electron-src/utils/flags.ts @@ -1,10 +1,14 @@ import type { App } from "electron"; export const addFlags = (app: App) => { + process.env.UV_THREADPOOL_SIZE = "128"; app.commandLine.appendSwitch( "enable-features", "HardwareMediaKeyHandling,MediaSessionService,WebGPU,WebGPUDeveloperFeatures,WebGPUImportTexture,CSSVideoDynamicRangeMediaQueries,ExtraWebGLVideoTextureMetadata", ); app.commandLine.appendSwitch("ignore-connections-limit", "localhost"); app.commandLine.appendArgument("--enable-experimental-web-platform-features"); - app.commandLine.appendSwitch('--js-flags="--max-old-space-size=32678"'); + app.commandLine.appendSwitch( + '--js-flags="--max-old-space-size=32678 --max-semi-space-size=32678 --use-largepages=silent"', + ); + app.commandLine.appendSwitch("--remote-allow-origins=*"); }; diff --git a/electron-src/utils/logger.ts b/electron-src/utils/logger.ts index 584abf2..082d388 100644 --- a/electron-src/utils/logger.ts +++ b/electron-src/utils/logger.ts @@ -1,5 +1,11 @@ import type { LeveledLogMethod } from "winston"; +import fs from "fs"; import winston from "winston"; +import { threadId } from "node:worker_threads"; +import path from "path"; +import os from "os"; +import { appPath } from "../versions"; +import { createWriteStream } from "fs"; const { combine, timestamp, printf, colorize, errors, json, splat } = winston.format; const ts = timestamp({ @@ -19,12 +25,25 @@ export const print = printf((info) => { const localFormat = combine(ts, colorize(), splat(), errors({ stack: true }), print); export const fileLogFormat = combine(ts, splat(), errors({ stack: true }), print); +const isDev = process.env.NODE_ENV !== "production"; +const logDir = path.join(os.homedir(), "Library", "Logs", appPath); +// ensure directory exists recursively +if (!fs.existsSync(logDir)) { + fs.mkdirSync(logDir, { recursive: true }); +} + +const workerPrefix = threadId ? `worker-${threadId}-` : ""; +export const logPath = path.join(logDir, `${isDev ? "dev-" : ""}${workerPrefix}run-${new Date().toISOString()}.log`); +export const logStream = createWriteStream(logPath); export const logger = winston.createLogger({ level: "debug", transports: [ new winston.transports.Console({ format: localFormat, - level: "debug", + }), + new winston.transports.Stream({ + stream: logStream, + format: fileLogFormat, }), ], }); diff --git a/electron-src/window/main-window.ts b/electron-src/window/main-window.ts index 65386d2..a356a8c 100644 --- a/electron-src/window/main-window.ts +++ b/electron-src/window/main-window.ts @@ -8,10 +8,9 @@ import isDev from "electron-is-dev"; import { format } from "url"; import { showErrorAlert, withRetries } from "../utils/util"; import prepareNext from "../utils/next-helper"; -import logger from "../utils/logger"; +import logger, { logStream } from "../utils/logger"; import { windows } from "../index"; import { addWebRequestToSession } from "../utils/routes"; -import { logStream } from "../logs"; const setupNext = async () => { try { diff --git a/electron-src/window/menu.ts b/electron-src/window/menu.ts index 84d69a8..daf4f13 100644 --- a/electron-src/window/menu.ts +++ b/electron-src/window/menu.ts @@ -6,8 +6,7 @@ import { showApp } from "../utils/util"; import { requestContactsPerms, requestFullDiskAccess } from "../ipc/ipc-onboarding"; import { clearSkipContactsPermsCheck } from "../options"; import { copyDbAtPath, copyLatestDb } from "../data/db-file-utils"; -import logger from "../utils/logger"; -import { logPath } from "../logs"; +import logger, { logPath } from "../utils/logger"; export const getMenu = () => { const menuTemplate: MenuItemConstructorOptions[] = [ diff --git a/next.config.mjs b/next.config.mjs index ef74c99..a37f957 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -17,6 +17,7 @@ const nextConfig = { }, compiler: { styledComponents: true, + emotion: true, }, transpilePackages: ["@mui/material", "lodash-es"], modularizeImports: { @@ -34,6 +35,7 @@ const nextConfig = { }, }, output: "export", + productionBrowserSourceMaps: true, }; export default nextConfig; diff --git a/src/components/global-search/GlobalSearch.tsx b/src/components/global-search/GlobalSearch.tsx index b65d5db..391b7fb 100644 --- a/src/components/global-search/GlobalSearch.tsx +++ b/src/components/global-search/GlobalSearch.tsx @@ -224,7 +224,7 @@ const ToggleSemanticSearch = () => { Loading Vectors into Memory - This takes ~2s per 100k messages + This takes ~3s per 100k messages @@ -268,10 +268,10 @@ const ToggleSemanticSearch = () => { checked={useSemanticSearch} onChange={async () => { const newUseSemanticSearch = !useSemanticSearch; - setUseSemanticSearch(newUseSemanticSearch); if (newUseSemanticSearch) { await mutateAsync(); } + setUseSemanticSearch(newUseSemanticSearch); setIsOpen(false); }} disabled={disabled}