From e846225ef66265e0003b27f39087e3cfa3854559 Mon Sep 17 00:00:00 2001 From: Eugene Shvets <3442792+chaosrealm@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:07:06 -0700 Subject: [PATCH] Update scraper tests (#44) * Update scraper tests * Improve test catalog naming --- package-lock.json | 4 ++-- src/scraping.test.ts | 53 ++++++++++++++++++-------------------------- 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/package-lock.json b/package-lock.json index e4ac8cb..18ebe81 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@cortexclick/cortex", - "version": "0.0.8", + "version": "0.0.10", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@cortexclick/cortex", - "version": "0.0.8", + "version": "0.0.10", "license": "Apache-2.0", "dependencies": { "form-data": "^4.0.0" diff --git a/src/scraping.test.ts b/src/scraping.test.ts index 6260a7c..c7d8170 100644 --- a/src/scraping.test.ts +++ b/src/scraping.test.ts @@ -1,17 +1,23 @@ import { expect, test } from "vitest"; import { CatalogConfig } from "./catalog"; import { SitemapDocument, UrlDocument } from "./document"; -import { testClient } from "./src/vitest-test-client"; +import { testClient } from "./vitest-test-client"; const runScraperTests = process.env.RUN_SCRAPER_TESTS === "true"; -const expectedSitemapUrls = 4; +const expectedSitemapUrls = 28; + +function getRandomCatalogName(): string { + // return a random name with a recongizable prefix and timestamp (so it's reasy to clean up leaks and identify problematic tests) + // Still use a random part because toISOString() only has millisecond resolution + return `sdk-scraper-test-${new Date().toISOString().replace(/[.:]/g, "-")}-${Math.floor(Math.random() * 1000)}`; +} test.skipIf(!runScraperTests)( "Test scraping single URL", { timeout: 60000 }, async () => { - const catalogName = `catalog-${Math.floor(Math.random() * 10000)}`; + const catalogName = getRandomCatalogName(); const config: CatalogConfig = { description: "foo bar", @@ -50,9 +56,9 @@ test.skipIf(!runScraperTests)( test.skipIf(!runScraperTests)( "Test scraping sitemap", - { timeout: 60000 }, + { timeout: 120000 }, async () => { - const catalogName = `catalog-${Math.floor(Math.random() * 10000)}`; + const catalogName = getRandomCatalogName(); const config: CatalogConfig = { description: "foo bar", @@ -88,13 +94,11 @@ test.skipIf(!runScraperTests)( test.skipIf(!runScraperTests)( "Test isolation of scraping multiple catalogs at once", - { timeout: 60000 }, + { timeout: 120000 }, async () => { - const catalogName1 = `catalog-${Math.floor(Math.random() * 10000)}`; - const catalogName2 = `catalog-${Math.floor(Math.random() * 10000)}`; - const catalogName3 = `catalog-${Math.floor(Math.random() * 10000)}`; - const catalogName4 = `catalog-${Math.floor(Math.random() * 10000)}`; - const catalogName5 = `catalog-${Math.floor(Math.random() * 10000)}`; + const catalogName1 = getRandomCatalogName(); + const catalogName2 = getRandomCatalogName(); + const catalogName3 = getRandomCatalogName(); const config: CatalogConfig = { description: "foo bar", @@ -104,8 +108,6 @@ test.skipIf(!runScraperTests)( const catalog1 = await testClient.configureCatalog(catalogName1, config); const catalog2 = await testClient.configureCatalog(catalogName2, config); const catalog3 = await testClient.configureCatalog(catalogName3, config); - const catalog4 = await testClient.configureCatalog(catalogName4, config); - const catalog5 = await testClient.configureCatalog(catalogName5, config); const docs: SitemapDocument[] = [ { @@ -117,28 +119,19 @@ test.skipIf(!runScraperTests)( catalog1.upsertDocuments(docs); catalog2.upsertDocuments(docs); catalog3.upsertDocuments(docs); - catalog4.upsertDocuments(docs); - catalog5.upsertDocuments(docs); let docsFound = false; while (!docsFound) { - const catalog1Count = await catalog1.documentCount(); - const catalog2Count = await catalog2.documentCount(); - const catalog3Count = await catalog3.documentCount(); - const catalog4Count = await catalog4.documentCount(); - const catalog5Count = await catalog5.documentCount(); - if ( - [ - catalog1Count, - catalog2Count, - catalog3Count, - catalog4Count, - catalog5Count, - ].every((e) => e === 4) - ) { + const count1 = await catalog1.documentCount(); + const count2 = await catalog2.documentCount(); + const count3 = await catalog3.documentCount(); + if ([count1, count2, count3].every((e) => e === expectedSitemapUrls)) { docsFound = true; } else { + console.log( + `Waiting for all 3 catalogs to be populated. C1: ${count1}, C2: ${count2}, C3: ${count3} docs found. sleeping...`, + ); await sleep(5000); } } @@ -146,8 +139,6 @@ test.skipIf(!runScraperTests)( await catalog1.delete(); await catalog2.delete(); await catalog3.delete(); - await catalog4.delete(); - await catalog5.delete(); }, );