diff --git a/package.json b/package.json index 9dda948..8998462 100644 --- a/package.json +++ b/package.json @@ -91,4 +91,4 @@ ] }, "packageManager": "yarn@1.22.22" -} +} \ No newline at end of file diff --git a/src/adapters/supabase/helpers/embeddings.ts b/src/adapters/supabase/helpers/embeddings.ts index 8cc2e0e..f17d8b1 100644 --- a/src/adapters/supabase/helpers/embeddings.ts +++ b/src/adapters/supabase/helpers/embeddings.ts @@ -88,7 +88,7 @@ export class Embeddings extends Super { source_id: sourceId, type, plaintext: htmlToPlainText(markdownToPlainText(markdown)).trim(), - embedding: await this._embedWithVoyage(markdown), + embedding: await this._embedWithVoyage(markdown, "document"), metadata, created_at: new Date().toISOString(), modified_at: new Date().toISOString(), @@ -111,8 +111,13 @@ export class Embeddings extends Super { if (!body) { throw new Error(this.context.logger.error("Markdown not found", { sourceId })?.logMessage.raw); } + const embeddingData = await this.getEmbedding(sourceId); + + if (!embeddingData) { + return await this.createEmbedding(sourceId, type, body, metadata); + } - const embedding = await this._embedWithVoyage(body); + const embedding = await this._embedWithVoyage(body, "document"); const toStore: Omit = { source_id: sourceId, @@ -123,12 +128,6 @@ export class Embeddings extends Super { modified_at: new Date().toISOString(), }; - const embeddingData = await this.getEmbedding(sourceId); - - if (!embeddingData) { - return await this.createEmbedding(sourceId, type, body, metadata); - } - const { error } = await this.supabase.from("content").update(toStore).eq("source_id", sourceId); if (error) { @@ -158,7 +157,7 @@ export class Embeddings extends Super { // Working with embeddings async findSimilarIssues(markdown: string, threshold: number, currentId: string): Promise { - const embedding = await this._embedWithVoyage(markdown); + const embedding = await this._embedWithVoyage(markdown, "query"); const { data, error } = await this.supabase.rpc("find_similar_issues", { current_id: currentId, query_embedding: embedding, @@ -173,7 +172,7 @@ export class Embeddings extends Super { // Helpers - private async _embedWithVoyage(text: string | null): Promise { + async _embedWithVoyage(text: string | null, inputType: "document" | "query"): Promise { try { if (text === null) { return new Array(VECTOR_SIZE).fill(0); @@ -181,6 +180,7 @@ export class Embeddings extends Super { const response = await this._voyageClient.embed({ input: text, model: "voyage-large-2-instruct", + inputType: inputType }); return (response.data && response.data[0]?.embedding) || []; } @@ -189,7 +189,7 @@ export class Embeddings extends Super { } } - private _getMetadata(payload: Context["payload"]) { + private _getMetadata(payload: Context<"issue_comment.edited" | "issue_comment.deleted" | "issues.edited" | "issues.deleted" | "issue_comment.created" | "issues.opened">["payload"]) { const { repository: { private: isPrivate, node_id: repoNodeId }, issue: { node_id: issueNodeId }, diff --git a/src/handlers/create-comment-embedding.ts b/src/handlers/comments/create-comment-embedding.ts similarity index 82% rename from src/handlers/create-comment-embedding.ts rename to src/handlers/comments/create-comment-embedding.ts index a199c8f..ad0f99b 100644 --- a/src/handlers/create-comment-embedding.ts +++ b/src/handlers/comments/create-comment-embedding.ts @@ -1,5 +1,5 @@ -import { CallbackResult } from "../proxy-callbacks"; -import { Context } from "../types"; +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; export async function createCommentEmbedding(context: Context<"issue_comment.created">): Promise { const { diff --git a/src/handlers/delete-comment-embedding.ts b/src/handlers/comments/delete-comment-embedding.ts similarity index 80% rename from src/handlers/delete-comment-embedding.ts rename to src/handlers/comments/delete-comment-embedding.ts index 4deba78..7ae8a8e 100644 --- a/src/handlers/delete-comment-embedding.ts +++ b/src/handlers/comments/delete-comment-embedding.ts @@ -1,5 +1,5 @@ -import { CallbackResult } from "../proxy-callbacks"; -import { Context } from "../types"; +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; export async function deleteCommentEmbedding(context: Context<"issue_comment.deleted">): Promise { const { diff --git a/src/handlers/update-comment-embedding.ts b/src/handlers/comments/update-comment-embedding.ts similarity index 84% rename from src/handlers/update-comment-embedding.ts rename to src/handlers/comments/update-comment-embedding.ts index 8b1bed5..34e639b 100644 --- a/src/handlers/update-comment-embedding.ts +++ b/src/handlers/comments/update-comment-embedding.ts @@ -1,5 +1,5 @@ -import { CallbackResult } from "../proxy-callbacks"; -import { Context } from "../types"; +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; /** * Updates embeddings for comments. diff --git a/src/handlers/onboarding/create-setup-instructions.ts b/src/handlers/onboarding/create-setup-instructions.ts new file mode 100644 index 0000000..ca3be19 --- /dev/null +++ b/src/handlers/onboarding/create-setup-instructions.ts @@ -0,0 +1,77 @@ +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; + +export async function createSetupInstructions(context: Context<"push">): Promise { + const { + logger, + octokit, + adapters: { supabase }, + payload: { repository, commits, sender, pusher } + } = context; + + const docs = [] + + for (const commit of commits) { + const { added, modified } = commit; + const files = [] + + if (added && added.length > 0) { + files.push(...added) + } + if (modified && modified.length > 0) { + files.push(...modified) + } + + for (const file of files) { + if (file.endsWith(".md")) { + docs.push(file) + } + } + } + + if (docs.length === 0) { + return { status: 200, reason: "no markdown files found" }; + } + + logger.info(`Found ${docs.length} markdown files`); + if (!repository.owner || !repository.name) { + return { status: 200, reason: "no repository owner or name found" }; + } + + /** + * voyageai use a special encoding schema and we cannot easily + * use their encoder so we will just have to play it by ear for now. + */ + for (const doc of docs) { + const sourceId = repository.full_name + "/" + doc; + const docContent = await octokit.repos.getContent({ + owner: repository.owner.login, + repo: repository.name, + path: doc, + mediaType: { + format: "raw", + } + }); + + if (!docContent.data) { + return { status: 200, reason: "no content found" }; + } + + const text = docContent.data as unknown as string; + + const uploaded = await supabase.embeddings.createEmbedding(sourceId, "setup_instructions", text, { + author_association: "OWNER", + author_id: sender?.id, + isPrivate: repository.private, + repo_node_id: repository.node_id, + repo_full_name: repository.full_name, + fileChunkIndex: 0, + }); + + logger.info("Uploaded markdown file", { ...uploaded, embedding: "removed for brevity" }); + } + + logger.ok("Successfully uploaded setup instructions", { repository: repository.full_name }); + + return { status: 200, reason: "success" }; +} diff --git a/src/handlers/create-task-embedding.ts b/src/handlers/tasks/create-task-embedding.ts similarity index 82% rename from src/handlers/create-task-embedding.ts rename to src/handlers/tasks/create-task-embedding.ts index a3653ed..75e4758 100644 --- a/src/handlers/create-task-embedding.ts +++ b/src/handlers/tasks/create-task-embedding.ts @@ -1,5 +1,5 @@ -import { CallbackResult } from "../proxy-callbacks"; -import { Context } from "../types"; +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; export async function addTaskEmbedding(context: Context<"issues.opened">): Promise { const { diff --git a/src/handlers/delete-task-embedding.ts b/src/handlers/tasks/delete-task-embedding.ts similarity index 80% rename from src/handlers/delete-task-embedding.ts rename to src/handlers/tasks/delete-task-embedding.ts index 1cc12b3..6f52190 100644 --- a/src/handlers/delete-task-embedding.ts +++ b/src/handlers/tasks/delete-task-embedding.ts @@ -1,5 +1,5 @@ -import { CallbackResult } from "../proxy-callbacks"; -import { Context } from "../types"; +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; export async function deleteTaskEmbedding(context: Context<"issues.deleted">): Promise { const { diff --git a/src/handlers/task-deduplication.ts b/src/handlers/tasks/task-deduplication.ts similarity index 93% rename from src/handlers/task-deduplication.ts rename to src/handlers/tasks/task-deduplication.ts index 894b932..fe7dcc4 100644 --- a/src/handlers/task-deduplication.ts +++ b/src/handlers/tasks/task-deduplication.ts @@ -1,5 +1,5 @@ -import { Context } from "../types"; -import { IssueSimilaritySearchResult } from "../types/embeddings"; +import { Context } from "../../types"; +import { IssueSimilaritySearchResult } from "../../types/embeddings"; export interface IssueGraphqlResponse { node: { @@ -88,6 +88,10 @@ async function handleSimilarIssuesComment(context: Context, issueNumber: number, const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n"); const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`; + if (!payload.repository.owner || !payload.repository.name) { + return; + } + const existingComments = await context.octokit.issues.listComments({ owner: payload.repository.owner.login, repo: payload.repository.name, @@ -98,6 +102,10 @@ async function handleSimilarIssuesComment(context: Context, issueNumber: number, (comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)") ); + if (!payload.repository.owner || !payload.repository.name) { + return; + } + if (existingComment) { await context.octokit.issues.updateComment({ owner: payload.repository.owner.login, diff --git a/src/handlers/update-task-embedding.ts b/src/handlers/tasks/update-task-embedding.ts similarity index 82% rename from src/handlers/update-task-embedding.ts rename to src/handlers/tasks/update-task-embedding.ts index c7a5ff2..1728978 100644 --- a/src/handlers/update-task-embedding.ts +++ b/src/handlers/tasks/update-task-embedding.ts @@ -1,5 +1,5 @@ -import { CallbackResult } from "../proxy-callbacks"; -import { Context } from "../types"; +import { CallbackResult } from "../../proxy-callbacks"; +import { Context } from "../../types"; export async function updateTaskEmbedding(context: Context<"issues.edited">): Promise { const { diff --git a/src/proxy-callbacks.ts b/src/proxy-callbacks.ts index f7b8736..2911477 100644 --- a/src/proxy-callbacks.ts +++ b/src/proxy-callbacks.ts @@ -1,11 +1,12 @@ -import { createCommentEmbedding } from "./handlers/create-comment-embedding"; -import { addTaskEmbedding } from "./handlers/create-task-embedding"; -import { deleteCommentEmbedding } from "./handlers/delete-comment-embedding"; -import { deleteTaskEmbedding } from "./handlers/delete-task-embedding"; -import { taskSimilaritySearch } from "./handlers/task-deduplication"; -import { updateCommentEmbedding } from "./handlers/update-comment-embedding"; -import { updateTaskEmbedding } from "./handlers/update-task-embedding"; +import { createCommentEmbedding } from "./handlers/comments/create-comment-embedding"; +import { addTaskEmbedding } from "./handlers/tasks/create-task-embedding"; +import { deleteCommentEmbedding } from "./handlers/comments/delete-comment-embedding"; +import { deleteTaskEmbedding } from "./handlers/tasks/delete-task-embedding"; +import { taskSimilaritySearch } from "./handlers/tasks/task-deduplication"; +import { updateCommentEmbedding } from "./handlers/comments/update-comment-embedding"; +import { updateTaskEmbedding } from "./handlers/tasks/update-task-embedding"; import { Context, SupportedEvents, SupportedEventsU } from "./types"; +import { createSetupInstructions } from "./handlers/onboarding/create-setup-instructions"; export type CallbackResult = { status: 200 | 201 | 204 | 404 | 500; reason: string; content?: string | Record }; @@ -45,6 +46,8 @@ const callbacks = { "issues.opened": [addTaskEmbedding, taskSimilaritySearch], "issues.edited": [updateTaskEmbedding], "issues.deleted": [deleteTaskEmbedding], + + "push": [createSetupInstructions] } as ProxyCallbacks; /** diff --git a/src/types/context.ts b/src/types/context.ts index 1227abf..e5a7709 100644 --- a/src/types/context.ts +++ b/src/types/context.ts @@ -16,7 +16,8 @@ export type SupportedEventsU = | "issue_comment.edited" | "issues.opened" | "issues.edited" - | "issues.deleted"; + | "issues.deleted" + | "push" export type SupportedEvents = { [K in SupportedEventsU]: K extends WebhookEventName ? WebhookEvent : never; diff --git a/src/types/embeddings.ts b/src/types/embeddings.ts index cc786ca..918d7e8 100644 --- a/src/types/embeddings.ts +++ b/src/types/embeddings.ts @@ -14,6 +14,7 @@ export interface CommentMetadata { issue_node_id: string; repo_node_id: string; isPrivate: boolean; + [key: string]: any; } export interface IssueSimilaritySearchResult { diff --git a/tests/main.test.ts b/tests/main.test.ts index e601af9..a7c2ab0 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -199,11 +199,11 @@ function createContext( okSpy: jest.SpyInstance; verboseSpy: jest.SpyInstance; repo: Context["payload"]["repository"]; - issue1: Context["payload"]["issue"]; + issue1: Context<"issue_comment.created">["payload"]["issue"]; } { const repo = db.repo.findFirst({ where: { id: { equals: repoId } } }) as unknown as Context["payload"]["repository"]; const sender = db.users.findFirst({ where: { id: { equals: payloadSenderId } } }) as unknown as Context["payload"]["sender"]; - const issue1 = db.issue.findFirst({ where: { id: { equals: issueOne } } }) as unknown as Context["payload"]["issue"]; + const issue1 = db.issue.findFirst({ where: { id: { equals: issueOne } } }) as unknown as Context<"issue_comment.created">["payload"]["issue"]; createComment(commentBody, commentId, nodeId); // create it first then pull it from the DB and feed it to _createContext const comment = db.issueComments.findFirst({ @@ -237,7 +237,7 @@ function createContext( function createContextInner( repo: Context["payload"]["repository"], sender: Context["payload"]["sender"], - issue: Context["payload"]["issue"], + issue: Context<"issue_comment.created">["payload"]["issue"], comment: SupportedEvents["issue_comment.created"]["payload"]["comment"], eventName: SupportedEventsU ): Context<"issue_comment.created"> {