Skip to content

Commit

Permalink
feat: repo readme embeddings (setup_instructions)
Browse files Browse the repository at this point in the history
  • Loading branch information
Keyrxng committed Sep 17, 2024
1 parent 44ea012 commit b3443ec
Show file tree
Hide file tree
Showing 14 changed files with 127 additions and 37 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@
]
},
"packageManager": "[email protected]"
}
}
22 changes: 11 additions & 11 deletions src/adapters/supabase/helpers/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ export class Embeddings extends Super {
source_id: sourceId,
type,
plaintext: htmlToPlainText(markdownToPlainText(markdown)).trim(),
embedding: await this._embedWithVoyage(markdown),
embedding: await this._embedWithVoyage(markdown, "document"),
metadata,
created_at: new Date().toISOString(),
modified_at: new Date().toISOString(),
Expand All @@ -111,8 +111,13 @@ export class Embeddings extends Super {
if (!body) {
throw new Error(this.context.logger.error("Markdown not found", { sourceId })?.logMessage.raw);
}
const embeddingData = await this.getEmbedding(sourceId);

if (!embeddingData) {
return await this.createEmbedding(sourceId, type, body, metadata);
}

const embedding = await this._embedWithVoyage(body);
const embedding = await this._embedWithVoyage(body, "document");

const toStore: Omit<CommentType, "created_at"> = {
source_id: sourceId,
Expand All @@ -123,12 +128,6 @@ export class Embeddings extends Super {
modified_at: new Date().toISOString(),
};

const embeddingData = await this.getEmbedding(sourceId);

if (!embeddingData) {
return await this.createEmbedding(sourceId, type, body, metadata);
}

const { error } = await this.supabase.from("content").update(toStore).eq("source_id", sourceId);

if (error) {
Expand Down Expand Up @@ -158,7 +157,7 @@ export class Embeddings extends Super {
// Working with embeddings

async findSimilarIssues(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[]> {
const embedding = await this._embedWithVoyage(markdown);
const embedding = await this._embedWithVoyage(markdown, "query");
const { data, error } = await this.supabase.rpc("find_similar_issues", {
current_id: currentId,
query_embedding: embedding,
Expand All @@ -173,14 +172,15 @@ export class Embeddings extends Super {

// Helpers

private async _embedWithVoyage(text: string | null): Promise<number[]> {
async _embedWithVoyage(text: string | null, inputType: "document" | "query"): Promise<number[]> {
try {
if (text === null) {
return new Array(VECTOR_SIZE).fill(0);
} else {
const response = await this._voyageClient.embed({
input: text,
model: "voyage-large-2-instruct",
inputType: inputType
});
return (response.data && response.data[0]?.embedding) || [];
}
Expand All @@ -189,7 +189,7 @@ export class Embeddings extends Super {
}
}

private _getMetadata(payload: Context["payload"]) {
private _getMetadata(payload: Context<"issue_comment.edited" | "issue_comment.deleted" | "issues.edited" | "issues.deleted" | "issue_comment.created" | "issues.opened">["payload"]) {
const {
repository: { private: isPrivate, node_id: repoNodeId },
issue: { node_id: issueNodeId },
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CallbackResult } from "../proxy-callbacks";
import { Context } from "../types";
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

export async function createCommentEmbedding(context: Context<"issue_comment.created">): Promise<CallbackResult> {
const {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CallbackResult } from "../proxy-callbacks";
import { Context } from "../types";
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

export async function deleteCommentEmbedding(context: Context<"issue_comment.deleted">): Promise<CallbackResult> {
const {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CallbackResult } from "../proxy-callbacks";
import { Context } from "../types";
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

/**
* Updates embeddings for comments.
Expand Down
77 changes: 77 additions & 0 deletions src/handlers/onboarding/create-setup-instructions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

export async function createSetupInstructions(context: Context<"push">): Promise<CallbackResult> {
const {
logger,
octokit,
adapters: { supabase },
payload: { repository, commits, sender, pusher }
} = context;

const docs = []

for (const commit of commits) {
const { added, modified } = commit;
const files = []

if (added && added.length > 0) {
files.push(...added)
}
if (modified && modified.length > 0) {
files.push(...modified)
}

for (const file of files) {
if (file.endsWith(".md")) {
docs.push(file)
}
}
}

if (docs.length === 0) {
return { status: 200, reason: "no markdown files found" };
}

logger.info(`Found ${docs.length} markdown files`);
if (!repository.owner || !repository.name) {
return { status: 200, reason: "no repository owner or name found" };
}

/**
* voyageai use a special encoding schema and we cannot easily
* use their encoder so we will just have to play it by ear for now.
*/
for (const doc of docs) {
const sourceId = repository.full_name + "/" + doc;
const docContent = await octokit.repos.getContent({
owner: repository.owner.login,
repo: repository.name,
path: doc,
mediaType: {
format: "raw",
}
});

if (!docContent.data) {
return { status: 200, reason: "no content found" };
}

const text = docContent.data as unknown as string;

const uploaded = await supabase.embeddings.createEmbedding(sourceId, "setup_instructions", text, {
author_association: "OWNER",
author_id: sender?.id,
isPrivate: repository.private,
repo_node_id: repository.node_id,
repo_full_name: repository.full_name,
fileChunkIndex: 0,
});

logger.info("Uploaded markdown file", { ...uploaded, embedding: "removed for brevity" });
}

logger.ok("Successfully uploaded setup instructions", { repository: repository.full_name });

return { status: 200, reason: "success" };
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CallbackResult } from "../proxy-callbacks";
import { Context } from "../types";
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

export async function addTaskEmbedding(context: Context<"issues.opened">): Promise<CallbackResult> {
const {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CallbackResult } from "../proxy-callbacks";
import { Context } from "../types";
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

export async function deleteTaskEmbedding(context: Context<"issues.deleted">): Promise<CallbackResult> {
const {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Context } from "../types";
import { IssueSimilaritySearchResult } from "../types/embeddings";
import { Context } from "../../types";
import { IssueSimilaritySearchResult } from "../../types/embeddings";

export interface IssueGraphqlResponse {
node: {
Expand Down Expand Up @@ -88,6 +88,10 @@ async function handleSimilarIssuesComment(context: Context, issueNumber: number,
const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n");
const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`;

if (!payload.repository.owner || !payload.repository.name) {
return;
}

const existingComments = await context.octokit.issues.listComments({
owner: payload.repository.owner.login,
repo: payload.repository.name,
Expand All @@ -98,6 +102,10 @@ async function handleSimilarIssuesComment(context: Context, issueNumber: number,
(comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)")
);

if (!payload.repository.owner || !payload.repository.name) {
return;
}

if (existingComment) {
await context.octokit.issues.updateComment({
owner: payload.repository.owner.login,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CallbackResult } from "../proxy-callbacks";
import { Context } from "../types";
import { CallbackResult } from "../../proxy-callbacks";
import { Context } from "../../types";

export async function updateTaskEmbedding(context: Context<"issues.edited">): Promise<CallbackResult> {
const {
Expand Down
17 changes: 10 additions & 7 deletions src/proxy-callbacks.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import { createCommentEmbedding } from "./handlers/create-comment-embedding";
import { addTaskEmbedding } from "./handlers/create-task-embedding";
import { deleteCommentEmbedding } from "./handlers/delete-comment-embedding";
import { deleteTaskEmbedding } from "./handlers/delete-task-embedding";
import { taskSimilaritySearch } from "./handlers/task-deduplication";
import { updateCommentEmbedding } from "./handlers/update-comment-embedding";
import { updateTaskEmbedding } from "./handlers/update-task-embedding";
import { createCommentEmbedding } from "./handlers/comments/create-comment-embedding";
import { addTaskEmbedding } from "./handlers/tasks/create-task-embedding";
import { deleteCommentEmbedding } from "./handlers/comments/delete-comment-embedding";
import { deleteTaskEmbedding } from "./handlers/tasks/delete-task-embedding";
import { taskSimilaritySearch } from "./handlers/tasks/task-deduplication";
import { updateCommentEmbedding } from "./handlers/comments/update-comment-embedding";
import { updateTaskEmbedding } from "./handlers/tasks/update-task-embedding";
import { Context, SupportedEvents, SupportedEventsU } from "./types";
import { createSetupInstructions } from "./handlers/onboarding/create-setup-instructions";

export type CallbackResult = { status: 200 | 201 | 204 | 404 | 500; reason: string; content?: string | Record<string, unknown> };

Expand Down Expand Up @@ -45,6 +46,8 @@ const callbacks = {
"issues.opened": [addTaskEmbedding, taskSimilaritySearch],
"issues.edited": [updateTaskEmbedding],
"issues.deleted": [deleteTaskEmbedding],

"push": [createSetupInstructions]
} as ProxyCallbacks;

/**
Expand Down
3 changes: 2 additions & 1 deletion src/types/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ export type SupportedEventsU =
| "issue_comment.edited"
| "issues.opened"
| "issues.edited"
| "issues.deleted";
| "issues.deleted"
| "push"

export type SupportedEvents = {
[K in SupportedEventsU]: K extends WebhookEventName ? WebhookEvent<K> : never;
Expand Down
1 change: 1 addition & 0 deletions src/types/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export interface CommentMetadata {
issue_node_id: string;
repo_node_id: string;
isPrivate: boolean;
[key: string]: any;
}

export interface IssueSimilaritySearchResult {
Expand Down
6 changes: 3 additions & 3 deletions tests/main.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,11 @@ function createContext(
okSpy: jest.SpyInstance;
verboseSpy: jest.SpyInstance;
repo: Context["payload"]["repository"];
issue1: Context["payload"]["issue"];
issue1: Context<"issue_comment.created">["payload"]["issue"];
} {
const repo = db.repo.findFirst({ where: { id: { equals: repoId } } }) as unknown as Context["payload"]["repository"];
const sender = db.users.findFirst({ where: { id: { equals: payloadSenderId } } }) as unknown as Context["payload"]["sender"];
const issue1 = db.issue.findFirst({ where: { id: { equals: issueOne } } }) as unknown as Context["payload"]["issue"];
const issue1 = db.issue.findFirst({ where: { id: { equals: issueOne } } }) as unknown as Context<"issue_comment.created">["payload"]["issue"];

createComment(commentBody, commentId, nodeId); // create it first then pull it from the DB and feed it to _createContext
const comment = db.issueComments.findFirst({
Expand Down Expand Up @@ -237,7 +237,7 @@ function createContext(
function createContextInner(
repo: Context["payload"]["repository"],
sender: Context["payload"]["sender"],
issue: Context["payload"]["issue"],
issue: Context<"issue_comment.created">["payload"]["issue"],
comment: SupportedEvents["issue_comment.created"]["payload"]["comment"],
eventName: SupportedEventsU
): Context<"issue_comment.created"> {
Expand Down

0 comments on commit b3443ec

Please sign in to comment.