From 0ddf27c6b0d63ecbabcc12426d5b69dae75b8f06 Mon Sep 17 00:00:00 2001 From: Eddie Date: Wed, 15 Jan 2025 13:41:13 -0300 Subject: [PATCH] incognito tools --- tools/download-page/metadata.json | 7 +++- tools/download-page/tool.ts | 65 +++++++++++++++++++++++++------ tools/perplexity/metadata.json | 2 +- tools/perplexity/tool.ts | 47 +++++++++++----------- 4 files changed, 84 insertions(+), 37 deletions(-) diff --git a/tools/download-page/metadata.json b/tools/download-page/metadata.json index 6a3d0c0..19c80fb 100644 --- a/tools/download-page/metadata.json +++ b/tools/download-page/metadata.json @@ -12,7 +12,12 @@ ], "configurations": { "type": "object", - "properties": {}, + "properties": { + "chromePath": { + "type": "string", + "description": "The path to the Chrome executable" + } + }, "required": [] }, "parameters": { diff --git a/tools/download-page/tool.ts b/tools/download-page/tool.ts index 8e035d6..80e36b6 100644 --- a/tools/download-page/tool.ts +++ b/tools/download-page/tool.ts @@ -1,25 +1,68 @@ + +import chromePaths from 'npm:chrome-paths@1.0.1'; import TurndownService from 'npm:turndown@7.2.0'; -import axios from 'npm:axios@1.7.7'; +import { addExtra } from 'npm:puppeteer-extra@3.3.6'; +import rebrowserPuppeteer from 'npm:rebrowser-puppeteer@23.10.1'; +import StealthPlugin from 'npm:puppeteer-extra-plugin-stealth@2.11.2'; + +import { getHomePath } from './shinkai-local-support.ts'; + +type Configurations = { + chromePath?: string; +}; -type Configurations = {}; type Parameters = { url: string; + incognito?: boolean; }; type Result = { markdown: string }; + export type Run, I extends Record, R extends Record> = (config: C, inputs: I) => Promise; +const puppeteer = addExtra(rebrowserPuppeteer as any); +const pluginStealth = StealthPlugin(); +pluginStealth.enabledEvasions.delete('chrome.loadTimes'); +pluginStealth.enabledEvasions.delete('chrome.runtime'); +puppeteer.use(pluginStealth); + export const run: Run = async ( - _configurations: Configurations, + configurations: Configurations, parameters: Parameters, ): Promise => { - try { - const response = await axios.get(parameters.url); - const turndownService = new TurndownService(); - const markdown = turndownService.turndown(response.data); - return Promise.resolve({ markdown }); - } catch (error) { - console.log('error', error); - return Promise.resolve({ markdown: '' }); + const chromePath = + configurations?.chromePath || + Deno.env.get('CHROME_PATH') || + chromePaths.chrome || + chromePaths.chromium; + if (!chromePath) { + throw new Error('Chrome path not found'); } + console.log({ chromePath }) + const browser = await puppeteer.launch({ + executablePath: chromePath, + args: ['--disable-blink-features=AutomationControlled'], + }); + + const page = await browser.newPage(); + + console.log("Navigating to website..."); + await page.goto(parameters.url); + + console.log('Waiting for the page to load...'); + await page.waitForNetworkIdle(); + + console.log('Extracting HTML content...'); + const html = await page.content(); + + console.log('Closing browser...'); + await browser.close(); + + console.log('Saving HTML to file...'); + Deno.writeTextFileSync(await getHomePath() + '/download-page.html', html); + + console.log('Converting HTML to Markdown...'); + const turndownService = new TurndownService(); + const markdown = turndownService.turndown(html); + return Promise.resolve({ markdown }); }; diff --git a/tools/perplexity/metadata.json b/tools/perplexity/metadata.json index cc49133..074532f 100644 --- a/tools/perplexity/metadata.json +++ b/tools/perplexity/metadata.json @@ -6,7 +6,7 @@ "version": "1.0.0", "keywords": [ "perplexity", - "shinkai" + "search" ], "configurations": { "type": "object", diff --git a/tools/perplexity/tool.ts b/tools/perplexity/tool.ts index 2927a67..82bbf9c 100644 --- a/tools/perplexity/tool.ts +++ b/tools/perplexity/tool.ts @@ -1,7 +1,9 @@ -import * as playwright from 'npm:playwright@1.48.2'; + import chromePaths from 'npm:chrome-paths@1.0.1'; import TurndownService from 'npm:turndown@7.2.0'; -import { defineConfig } from 'npm:playwright@1.48.2/test'; +import { addExtra } from 'npm:puppeteer-extra@3.3.6'; +import rebrowserPuppeteer from 'npm:rebrowser-puppeteer@23.10.1'; +import StealthPlugin from 'npm:puppeteer-extra-plugin-stealth@2.11.2'; type Configurations = { chromePath?: string; @@ -10,52 +12,49 @@ type Parameters = { query: string; }; type Result = { response: string }; + export type Run, I extends Record, R extends Record> = (config: C, inputs: I) => Promise; +const puppeteer = addExtra(rebrowserPuppeteer as any); +const pluginStealth = StealthPlugin(); + +pluginStealth.enabledEvasions.delete('chrome.loadTimes'); +pluginStealth.enabledEvasions.delete('chrome.runtime'); + +puppeteer.use(pluginStealth); + export const run: Run = async ( configurations, params, ): Promise => { - defineConfig({ - use: { - actionTimeout: 60 * 1000, - navigationTimeout: 60 * 1000, - }, - }); const chromePath = configurations?.chromePath || Deno.env.get('CHROME_PATH') || chromePaths.chrome || chromePaths.chromium; - const browser = await playwright['chromium'].launch({ + if (!chromePath) { + throw new Error('Chrome path not found'); + } + const browser = await puppeteer.launch({ executablePath: chromePath, + args: ['--disable-blink-features=AutomationControlled'], }); - const context = await browser.newContext({ - viewport: { width: 1280, height: 800 }, // Set viewport size - userAgent: - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36', // Set Mac user agent - }); - const page = await context.newPage(); + const page = await browser.newPage(); console.log("Navigating to Perplexity's website..."); await page.goto('https://www.perplexity.ai/'); console.log('Waiting for the page to load...'); - await page.waitForTimeout(2500); + await page.waitForNetworkIdle({ timeout: 2500 }); console.log('Filling textarea with query:', params.query); - await page.fill('textarea', params.query); + await page.type('textarea', params.query); try { console.log('trying to click app popup'); - await page.click('button:has(svg[data-icon="xmark"])', { timeout: 2000 }); + await page.click('button:has(svg[data-icon="xmark"])'); } catch (_) { console.log('unable to find the x button to close the popup'); - /* - We do nothing, so we have two cases: - - the code continue and fails later because we are not able to click the "submit" button - - the code continue and it just works because the app was changed and the popup doesn't exists - */ } console.log('Clicking the button with the specified SVG...'); @@ -65,7 +64,7 @@ export const run: Run = async ( await page.waitForSelector('button:has(svg[data-icon="arrow-right"])'); console.log('Waiting for results to load...'); - await page.waitForSelector('div:has-text("Related")'); + await page.waitForSelector('text=Related'); console.log('Extracting HTML content...'); const htmlContent = await page.evaluate(() => {