diff --git a/.github/knip.ts b/.github/knip.ts index 5be9b7a..984d370 100644 --- a/.github/knip.ts +++ b/.github/knip.ts @@ -3,7 +3,14 @@ import type { KnipConfig } from "knip"; const config: KnipConfig = { entry: ["src/main.ts"], project: ["src/**/*.ts"], - ignore: ["src/types/config.ts", "**/__mocks__/**", "**/__fixtures__/**", "src/types/database.ts"], + ignore: [ + "src/types/config.ts", + "**/__mocks__/**", + "**/__fixtures__/**", + "src/types/database.ts", + "src/handlers/user-issue-scraper.ts", + "src/handlers/issue-scraper.ts", + ], ignoreExportsUsedInFile: true, // eslint can also be safely ignored as per the docs: https://knip.dev/guides/handling-issues#eslint--jest ignoreDependencies: ["eslint-config-prettier", "eslint-plugin-prettier", "ts-node"], diff --git a/.gitignore b/.gitignore index 40d3396..1fecd91 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ cypress/screenshots script.ts .wrangler test-dashboard.md +auth.users.json diff --git a/.husky/commit-msg b/.husky/commit-msg index b78bacb..7179f96 100644 --- a/.husky/commit-msg +++ b/.husky/commit-msg @@ -1,4 +1,4 @@ #!/usr/bin/env sh . "$(dirname -- "$0")/_/husky.sh" -yarn commitlint --edit "$1" \ No newline at end of file +bun commitlint --edit "$1" \ No newline at end of file diff --git a/.husky/pre-commit b/.husky/pre-commit index 5a182ef..1d67559 100644 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,4 +1,4 @@ #!/usr/bin/env sh . "$(dirname -- "$0")/_/husky.sh" -yarn lint-staged +bun lint-staged diff --git a/README.md b/README.md index bf698a8..8491bed 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ This is a plugin for [Ubiquibot](https://github.com/ubiquity-os/ubiquity-os-kern ## Testing Locally -- Run `yarn install` to install the dependencies. -- Run `yarn worker` to start the server. +- Run `bun install` to install the dependencies. +- Run `bun worker` to start the server. - Make HTTP requests to the server to test the plugin with content type `Application/JSON` ``` @@ -62,4 +62,4 @@ This is a plugin for [Ubiquibot](https://github.com/ubiquity-os/ubiquity-os-kern ## Testing -- Run `yarn test` to run the tests. +- Run `bun test` to run the tests. diff --git a/package.json b/package.json index fd4e44d..1159001 100644 --- a/package.json +++ b/package.json @@ -76,7 +76,7 @@ }, "lint-staged": { "*.ts": [ - "yarn prettier --write", + "bun prettier --write", "eslint --fix" ], "src/**.{ts,json}": [ diff --git a/src/handlers/issue-scraper.ts b/src/handlers/issue-scraper.ts new file mode 100644 index 0000000..da0ac3c --- /dev/null +++ b/src/handlers/issue-scraper.ts @@ -0,0 +1,304 @@ +import { createClient } from "@supabase/supabase-js"; +import { VoyageAIClient } from "voyageai"; +import { customOctokit as Octokit } from "@ubiquity-os/plugin-sdk/octokit"; +import markdownit from "markdown-it"; +import plainTextPlugin from "markdown-it-plain-text"; +import "dotenv/config"; +import { createAdapters } from "../adapters"; +import { Context } from "../types/context"; + +interface MarkdownItWithPlainText extends markdownit { + plainText: string; +} + +function markdownToPlainText(markdown: string | null): string | null { + if (!markdown) return markdown; + const md = markdownit() as MarkdownItWithPlainText; + md.use(plainTextPlugin); + md.render(markdown); + return md.plainText; +} + +interface IssueMetadata { + nodeId: string; + number: number; + title: string; + body: string; + state: string; + repositoryName: string; + repositoryId: number; + assignees: string[]; + authorId: number; + createdAt: string; + closedAt: string | null; + stateReason: string | null; + updatedAt: string; +} + +interface IssueNode { + id: string; + number: number; + title: string; + body: string; + state: string; + stateReason: string | null; + createdAt: string; + updatedAt: string; + closedAt: string | null; + author: { + login: string; + } | null; + assignees: { + nodes: Array<{ + login: string; + }>; + }; + repository: { + id: string; + name: string; + owner: { + login: string; + }; + }; +} + +interface GraphQlSearchResponse { + search: { + pageInfo: { + hasNextPage: boolean; + endCursor: string | null; + }; + nodes: Array; + }; +} + +const SEARCH_ISSUES_QUERY = ` + query SearchIssues($searchText: String!, $after: String) { + search( + query: $searchText, + type: ISSUE, + first: 100, + after: $after + ) { + pageInfo { + hasNextPage + endCursor + } + nodes { + ... on Issue { + id + number + title + body + state + stateReason + createdAt + updatedAt + closedAt + author { + login + } + assignees(first: 10) { + nodes { + login + } + } + repository { + id + name + owner { + login + } + } + } + } + } + } +`; + +async function fetchAuthorId(octokit: InstanceType, login: string): Promise { + try { + const response = await octokit.rest.users.getByUsername({ username: login }); + return response.data.id; + } catch (error) { + console.error(`Error fetching author ID for ${login}:`, error); + return -1; + } +} + +async function fetchUserIssues(octokit: InstanceType, username: string): Promise { + const allIssues: IssueNode[] = []; + let hasNextPage = true; + let cursor: string | null = null; + + const searchText = `assignee:${username} is:issue is:closed`; + + while (hasNextPage) { + const variables: { searchText: string; after?: string } = { + searchText, + }; + if (cursor) { + variables.after = cursor; + } + + const response: GraphQlSearchResponse = await octokit.graphql(SEARCH_ISSUES_QUERY, variables); + + const completedIssues = response.search.nodes.filter((issue) => issue.stateReason === "COMPLETED"); + allIssues.push(...completedIssues); + + hasNextPage = response.search.pageInfo.hasNextPage; + cursor = response.search.pageInfo.endCursor; + + if (!cursor) break; + } + + return allIssues; +} + +// Pulls issues from GitHub and stores them in Supabase +export async function issueScraper(username: string, token?: string): Promise { + try { + if (!username) { + throw new Error("Username is required"); + } + + const required = ["GITHUB_TOKEN", "SUPABASE_URL", "SUPABASE_KEY", "VOYAGEAI_API_KEY"]; + const missing = required.filter((key) => !process.env[key]); + if (missing.length > 0) { + throw new Error(`Missing required environment variables: ${missing.join(", ")}`); + } + + const context = { + adapters: {}, + logger: { + info: (message: string, data: Record) => console.log("INFO:", message + ":", data), + error: (message: string, data: Record) => console.error("ERROR:", message + ":", data), + }, + octokit: new Octokit({ auth: token || process.env.GITHUB_TOKEN }), + } as unknown as Context; + + const supabaseUrl = process.env.SUPABASE_URL; + const supabaseKey = process.env.SUPABASE_KEY; + const voyageApiKey = process.env.VOYAGEAI_API_KEY; + + if (!supabaseUrl || !supabaseKey || !voyageApiKey) { + throw new Error("Required environment variables are missing"); + } + + const supabase = createClient(supabaseUrl, supabaseKey); + const voyageClient = new VoyageAIClient({ apiKey: voyageApiKey }); + const adapters = createAdapters(supabase, voyageClient, context); + + const issues = await fetchUserIssues(context.octokit, username); + const processedIssues: Array<{ issue: IssueMetadata; error?: string }> = []; + + for (const issue of issues) { + try { + const authorId = issue.author?.login ? await fetchAuthorId(context.octokit, issue.author.login) : -1; + const repoOwner = issue.repository.owner.login; + + const metadata: IssueMetadata = { + nodeId: issue.id, + number: issue.number, + title: issue.title || "", + body: issue.body || "", + state: issue.state, + stateReason: issue.stateReason, + repositoryName: issue.repository.name, + repositoryId: parseInt(issue.repository.id), + assignees: (issue.assignees?.nodes || []).map((assignee) => assignee.login), + authorId, + createdAt: issue.createdAt, + closedAt: issue.closedAt, + updatedAt: issue.updatedAt, + }; + + const markdown = metadata.body + " " + metadata.title; + const plaintext = markdownToPlainText(markdown); + const embedding = await adapters.voyage.embedding.createEmbedding(plaintext); + + const payload = { + issue: metadata, + action: "created", + sender: { + login: username, + }, + repository: { + id: parseInt(issue.repository.id), + node_id: issue.repository.id, + name: issue.repository.name, + full_name: `${repoOwner}/${issue.repository.name}`, + owner: { + login: repoOwner, + id: authorId, + type: "User", + site_admin: false, + }, + }, + }; + + const { error } = await supabase.from("issues").upsert({ + id: metadata.nodeId, + markdown, + plaintext, + embedding: JSON.stringify(embedding), + author_id: metadata.authorId, + modified_at: metadata.updatedAt, + payload: payload, + }); + + processedIssues.push({ + issue: metadata, + error: error ? `Error storing issue: ${error.message}` : undefined, + }); + } catch (error) { + processedIssues.push({ + issue: { + nodeId: issue.id, + number: issue.number, + title: issue.title || "", + body: issue.body || "", + state: issue.state, + stateReason: issue.stateReason, + repositoryName: issue.repository.name, + repositoryId: parseInt(issue.repository.id), + assignees: [], + authorId: -1, + createdAt: issue.createdAt, + closedAt: issue.closedAt, + updatedAt: issue.updatedAt, + }, + error: `Error processing issue: ${error instanceof Error ? error.message : "Unknown error"}`, + }); + } + } + + return JSON.stringify( + { + success: true, + stats: { + storageSuccessful: processedIssues.filter((p) => !p.error).length, + storageFailed: processedIssues.filter((p) => p.error).length, + }, + errors: processedIssues + .filter((p) => p.error) + .map((p) => ({ + type: "storage", + name: `${p.issue.repositoryName}#${p.issue.number}`, + error: p.error, + })), + issues: processedIssues.map((p) => ({ + number: p.issue.number, + title: p.issue.title, + repo: p.issue.repositoryName, + error: p.error, + })), + }, + null, + 2 + ); + } catch (error) { + console.error("Error in issueScraper:", error); + throw error; + } +} diff --git a/src/handlers/user-issue-scraper.ts b/src/handlers/user-issue-scraper.ts new file mode 100644 index 0000000..99131ae --- /dev/null +++ b/src/handlers/user-issue-scraper.ts @@ -0,0 +1,46 @@ +import { issueScraper } from "./issue-scraper"; +import fs from "fs"; +import path from "path"; + +interface UserMetadata { + raw_user_meta_data: { + user_name: string; + }; +} + +export async function userIssueScraper(fileDir: string): Promise { + try { + // Read auth.users.json + const authUsersPath = path.join(fileDir, "auth.users.json"); + const authUsersData = fs.readFileSync(authUsersPath, "utf-8"); + const users = JSON.parse(authUsersData) as UserMetadata[]; + + console.log("Processing issues for all users"); + for (const user of users) { + const username = user.raw_user_meta_data.user_name; + if (!username) { + console.error("Username not found in user metadata"); + continue; + } + + console.log(`Processing issues for user: ${username}`); + try { + const result = await issueScraper(username); + console.log(result); + } catch (error) { + console.error(`Error processing user ${username}:`, error); + } + } + } catch (error) { + console.error("Error in userIssueScraper:", error); + throw error; + } +} + +// Run the scraper +userIssueScraper(process.cwd()) + .then(() => console.log("Completed processing all users")) + .catch((error) => { + console.error("Error running user issue scraper:", error); + process.exit(1); + });