Skip to content

Commit

Permalink
clean up logging for workers, improve query to only include semantic …
Browse files Browse the repository at this point in the history
…results
  • Loading branch information
jonluca committed Apr 19, 2023
1 parent c835b7a commit 3b63767
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 67 deletions.
81 changes: 50 additions & 31 deletions electron-src/data/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import type { SelectQueryBuilder, Kysely } from "kysely";
import { sql } from "kysely";
import type { DB as MesssagesDatabase } from "../../_generated/types";
import logger from "../utils/logger";
import { countBy, groupBy, partition } from "lodash-es";
import { countBy, groupBy, uniq } from "lodash-es";
import type { Contact } from "electron-mac-contacts";
import { decodeMessageBuffer, getTextFromBuffer } from "../utils/buffer";
import { appMessagesDbCopy } from "../utils/constants";
Expand Down Expand Up @@ -107,7 +107,7 @@ export class SQLDatabase extends BaseDatabase<MesssagesDatabase> {

getMessageGuidsFromText = async (texts: string[]) => {
const db = this.db;
const query = db.selectFrom("message").select(["guid", "text"]).where("text", "in", texts);
const query = db.selectFrom("message").select(["guid", "text"]).where("text", "in", texts).limit(10000);
const results = await query.execute();
const indexMap = new Map<string, number>();
for (let i = 0; i < texts.length; i++) {
Expand All @@ -132,8 +132,6 @@ export class SQLDatabase extends BaseDatabase<MesssagesDatabase> {
endDate?: Date | null,
) => {
const db = this.db;
// SELECT * FROM message_fts WHERE text MATCH 'jonluca' ORDER BY rank;

const cleanedQuery = searchTerm.replace(/[^a-zA-Z0-9 ]/g, "");
const textMatch = await db
.selectFrom("message_fts")
Expand All @@ -143,27 +141,29 @@ export class SQLDatabase extends BaseDatabase<MesssagesDatabase> {
.limit(1000)
.execute();
const messageGuids = textMatch.map((m) => m.message_id as string);
return this.fullTextMessageSearchWithGuids(messageGuids, searchTerm, chatIds, handleIds, startDate, endDate);
const query = this.getFilteredSearchQuery({ chatIds, handleIds, startDate, endDate })
.where(({ or, cmpr }) => {
return or([cmpr("filename", "like", "%" + searchTerm + "%"), cmpr("text", "like", "%" + searchTerm + "%")]);
})
.limit(1000);
const alternateResults = await query.execute();
const alternateMessageGuids = alternateResults.map((m) => m.guid as string);
const allMessageGuids = uniq([...messageGuids, ...alternateMessageGuids]);
return this.fullTextMessageSearchWithGuids(allMessageGuids, searchTerm, chatIds, handleIds, startDate, endDate);
};
fullTextMessageSearchWithGuids = async (
messageGuids: string[],
searchTerm: string,
chatIds?: number[],
handleIds?: number[],
startDate?: Date | null,
endDate?: Date | null,
) => {
const messageGuidsSet = new Set(messageGuids);

private getFilteredSearchQuery = ({
chatIds,
handleIds,
startDate,
endDate,
}: {
chatIds?: number[];
handleIds?: number[];
startDate?: Date | null;
endDate?: Date | null;
}) => {
let query = this.getJoinedMessageQuery()
.select("message.guid as guid")
.where(({ or, cmpr }) => {
return or([
cmpr("message.guid", "in", messageGuids),
cmpr("filename", "like", "%" + searchTerm + "%"),
cmpr("text", "like", "%" + searchTerm + "%"),
]);
})
.where("item_type", "not in", [1, 3, 4, 5, 6])
.where("associated_message_type", "=", 0);

Expand Down Expand Up @@ -193,17 +193,36 @@ export class SQLDatabase extends BaseDatabase<MesssagesDatabase> {
query = query.where("date", "<", offset);
}

return query;
};
fullTextMessageSearchWithGuids = async (
messageGuids: string[],
searchTerm: string,
chatIds?: number[],
handleIds?: number[],
startDate?: Date | null,
endDate?: Date | null,
) => {
const query = this.getFilteredSearchQuery({ chatIds, handleIds, startDate, endDate }).where(
"message.guid",
"in",
messageGuids,
);

const messages = await query.limit(10000).execute();
const [matchedMessages, unmatchedMessages] = partition(messages, (m) => messageGuidsSet.has(m.guid));
logger.info(`Matched ${matchedMessages.length} messages, unmatched ${unmatchedMessages.length} messages`);
matchedMessages.sort((a, b) => {
return messageGuids.indexOf(a.guid) - messageGuids.indexOf(b.guid);
});
unmatchedMessages.sort((a, b) => {
return (b.date || 0) - (a.date || 0);
const indexMap = new Map<string, number>();
for (let i = 0; i < messageGuids.length; i++) {
indexMap.set(messageGuids[i], i);
}
messages.sort((a, b) => {
const aIndex = indexMap.get(a.guid!);
const bIndex = indexMap.get(b.guid!);
if (aIndex === undefined || bIndex === undefined) {
return 0;
}
return aIndex - bIndex;
});
const allMessages = [...matchedMessages, ...unmatchedMessages];
return this.enhanceMessageResponses<(typeof messages)[number]>(allMessages);
return this.enhanceMessageResponses<(typeof messages)[number]>(messages);
};
private convertDate = (date: number) => {
return new Date(date / 1000000 + 978307200000);
Expand Down
10 changes: 1 addition & 9 deletions electron-src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@ import { getMenu } from "./window/menu";

import "better-sqlite3";
import { mainAppIconDevPng } from "./constants";
import logger, { fileLogFormat } from "./utils/logger";
import logger, { logPath, logStream } from "./utils/logger";
import { setupRouteHandlers } from "./utils/routes";
import { DESKTOP_VERSION } from "./versions";
import { autoUpdater } from "electron-updater";
import dbWorker from "./workers/database-worker";
import winston from "winston";
import { logPath, logStream } from "./logs";

registerContextMenu({
showSaveImageAs: true,
Expand All @@ -42,12 +40,6 @@ let errorTries = 0;
const MAX_ERROR_TRIES = 5;

const amMainInstance = app.requestSingleInstanceLock();
logger.add(
new winston.transports.Stream({
stream: logStream,
format: fileLogFormat,
}),
);
logger.info(`Starting logging to ${logPath}`);
if (!amMainInstance) {
logStream.write("Not the main instance - quitting");
Expand Down
15 changes: 0 additions & 15 deletions electron-src/logs.ts

This file was deleted.

13 changes: 9 additions & 4 deletions electron-src/semantic-search/semantic-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts)
let floatEmbedding = existingEmbedding?.embedding;

if (!existingEmbedding) {
const now = performance.now();
const configuration = new Configuration({
apiKey: openAiKey,
});
Expand All @@ -145,6 +146,7 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts)
input: queryText,
model: OPENAI_EMBEDDING_MODEL,
});
logger.info(`Got embedding from OpenAI in ${performance.now() - now}ms`);
const embed = openAiResponse.data;
const embedding = embed.data?.[0]?.embedding;
if (!embedding) {
Expand All @@ -155,7 +157,10 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts)
floatEmbedding = new Float32Array(embedding);
}

return dbWorker.embeddingsWorker.calculateSimilarity(floatEmbedding!);
const now = performance.now();
const calculateSimilarity = await dbWorker.embeddingsWorker.calculateSimilarity(floatEmbedding!);
logger.info(`Calculated similarity in ${performance.now() - now}ms`);
return calculateSimilarity;
}

handleIpc("createEmbeddings", async ({ openAiKey: openAiKey }) => {
Expand Down Expand Up @@ -196,14 +201,14 @@ handleIpc(
}
if (useSemanticSearch) {
logger.info("Using semantic search");
const now = performance.now();
const messageTexts = await semanticQuery({
openAiKey,
queryText: searchTerm,
});
logger.info(`Got ${messageTexts.length} results`);

logger.info(`Got ${messageTexts.length} results in ${performance.now() - now}ms`);
const guids = await dbWorker.worker.getMessageGuidsFromText(messageTexts);

logger.info(`Got ${guids.length} guids from text`);
return await dbWorker.worker.fullTextMessageSearchWithGuids(
guids,
searchTerm,
Expand Down
6 changes: 5 additions & 1 deletion electron-src/utils/flags.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import type { App } from "electron";
export const addFlags = (app: App) => {
process.env.UV_THREADPOOL_SIZE = "128";
app.commandLine.appendSwitch(
"enable-features",
"HardwareMediaKeyHandling,MediaSessionService,WebGPU,WebGPUDeveloperFeatures,WebGPUImportTexture,CSSVideoDynamicRangeMediaQueries,ExtraWebGLVideoTextureMetadata",
);
app.commandLine.appendSwitch("ignore-connections-limit", "localhost");
app.commandLine.appendArgument("--enable-experimental-web-platform-features");
app.commandLine.appendSwitch('--js-flags="--max-old-space-size=32678"');
app.commandLine.appendSwitch(
'--js-flags="--max-old-space-size=32678 --max-semi-space-size=32678 --use-largepages=silent"',
);
app.commandLine.appendSwitch("--remote-allow-origins=*");
};
21 changes: 20 additions & 1 deletion electron-src/utils/logger.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import type { LeveledLogMethod } from "winston";
import fs from "fs";
import winston from "winston";
import { threadId } from "node:worker_threads";
import path from "path";
import os from "os";
import { appPath } from "../versions";
import { createWriteStream } from "fs";

const { combine, timestamp, printf, colorize, errors, json, splat } = winston.format;
const ts = timestamp({
Expand All @@ -19,12 +25,25 @@ export const print = printf((info) => {
const localFormat = combine(ts, colorize(), splat(), errors({ stack: true }), print);
export const fileLogFormat = combine(ts, splat(), errors({ stack: true }), print);

const isDev = process.env.NODE_ENV !== "production";
const logDir = path.join(os.homedir(), "Library", "Logs", appPath);
// ensure directory exists recursively
if (!fs.existsSync(logDir)) {
fs.mkdirSync(logDir, { recursive: true });
}

const workerPrefix = threadId ? `worker-${threadId}-` : "";
export const logPath = path.join(logDir, `${isDev ? "dev-" : ""}${workerPrefix}run-${new Date().toISOString()}.log`);
export const logStream = createWriteStream(logPath);
export const logger = winston.createLogger({
level: "debug",
transports: [
new winston.transports.Console({
format: localFormat,
level: "debug",
}),
new winston.transports.Stream({
stream: logStream,
format: fileLogFormat,
}),
],
});
Expand Down
3 changes: 1 addition & 2 deletions electron-src/window/main-window.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@ import isDev from "electron-is-dev";
import { format } from "url";
import { showErrorAlert, withRetries } from "../utils/util";
import prepareNext from "../utils/next-helper";
import logger from "../utils/logger";
import logger, { logStream } from "../utils/logger";
import { windows } from "../index";
import { addWebRequestToSession } from "../utils/routes";
import { logStream } from "../logs";

const setupNext = async () => {
try {
Expand Down
3 changes: 1 addition & 2 deletions electron-src/window/menu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ import { showApp } from "../utils/util";
import { requestContactsPerms, requestFullDiskAccess } from "../ipc/ipc-onboarding";
import { clearSkipContactsPermsCheck } from "../options";
import { copyDbAtPath, copyLatestDb } from "../data/db-file-utils";
import logger from "../utils/logger";
import { logPath } from "../logs";
import logger, { logPath } from "../utils/logger";

export const getMenu = () => {
const menuTemplate: MenuItemConstructorOptions[] = [
Expand Down
2 changes: 2 additions & 0 deletions next.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const nextConfig = {
},
compiler: {
styledComponents: true,
emotion: true,
},
transpilePackages: ["@mui/material", "lodash-es"],
modularizeImports: {
Expand All @@ -34,6 +35,7 @@ const nextConfig = {
},
},
output: "export",
productionBrowserSourceMaps: true,
};

export default nextConfig;
4 changes: 2 additions & 2 deletions src/components/global-search/GlobalSearch.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ const ToggleSemanticSearch = () => {
Loading Vectors into Memory
</Typography>
<Typography variant="h6" sx={{ color: "white" }}>
This takes ~2s per 100k messages
This takes ~3s per 100k messages
</Typography>
<CircularProgress sx={{ my: 2 }} />
</>
Expand Down Expand Up @@ -268,10 +268,10 @@ const ToggleSemanticSearch = () => {
checked={useSemanticSearch}
onChange={async () => {
const newUseSemanticSearch = !useSemanticSearch;
setUseSemanticSearch(newUseSemanticSearch);
if (newUseSemanticSearch) {
await mutateAsync();
}
setUseSemanticSearch(newUseSemanticSearch);
setIsOpen(false);
}}
disabled={disabled}
Expand Down

0 comments on commit 3b63767

Please sign in to comment.