Skip to content

Commit

Permalink
Update scraper tests (#44)
Browse files Browse the repository at this point in the history
* Update scraper tests

* Improve test catalog naming
  • Loading branch information
chaosrealm authored Aug 6, 2024
1 parent 8c0c030 commit e846225
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 33 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 22 additions & 31 deletions src/scraping.test.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
import { expect, test } from "vitest";
import { CatalogConfig } from "./catalog";
import { SitemapDocument, UrlDocument } from "./document";
import { testClient } from "./src/vitest-test-client";
import { testClient } from "./vitest-test-client";

const runScraperTests = process.env.RUN_SCRAPER_TESTS === "true";

const expectedSitemapUrls = 4;
const expectedSitemapUrls = 28;

function getRandomCatalogName(): string {
// return a random name with a recongizable prefix and timestamp (so it's reasy to clean up leaks and identify problematic tests)
// Still use a random part because toISOString() only has millisecond resolution
return `sdk-scraper-test-${new Date().toISOString().replace(/[.:]/g, "-")}-${Math.floor(Math.random() * 1000)}`;
}

test.skipIf(!runScraperTests)(
"Test scraping single URL",
{ timeout: 60000 },
async () => {
const catalogName = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName = getRandomCatalogName();

const config: CatalogConfig = {
description: "foo bar",
Expand Down Expand Up @@ -50,9 +56,9 @@ test.skipIf(!runScraperTests)(

test.skipIf(!runScraperTests)(
"Test scraping sitemap",
{ timeout: 60000 },
{ timeout: 120000 },
async () => {
const catalogName = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName = getRandomCatalogName();

const config: CatalogConfig = {
description: "foo bar",
Expand Down Expand Up @@ -88,13 +94,11 @@ test.skipIf(!runScraperTests)(

test.skipIf(!runScraperTests)(
"Test isolation of scraping multiple catalogs at once",
{ timeout: 60000 },
{ timeout: 120000 },
async () => {
const catalogName1 = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName2 = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName3 = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName4 = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName5 = `catalog-${Math.floor(Math.random() * 10000)}`;
const catalogName1 = getRandomCatalogName();
const catalogName2 = getRandomCatalogName();
const catalogName3 = getRandomCatalogName();

const config: CatalogConfig = {
description: "foo bar",
Expand All @@ -104,8 +108,6 @@ test.skipIf(!runScraperTests)(
const catalog1 = await testClient.configureCatalog(catalogName1, config);
const catalog2 = await testClient.configureCatalog(catalogName2, config);
const catalog3 = await testClient.configureCatalog(catalogName3, config);
const catalog4 = await testClient.configureCatalog(catalogName4, config);
const catalog5 = await testClient.configureCatalog(catalogName5, config);

const docs: SitemapDocument[] = [
{
Expand All @@ -117,37 +119,26 @@ test.skipIf(!runScraperTests)(
catalog1.upsertDocuments(docs);
catalog2.upsertDocuments(docs);
catalog3.upsertDocuments(docs);
catalog4.upsertDocuments(docs);
catalog5.upsertDocuments(docs);

let docsFound = false;

while (!docsFound) {
const catalog1Count = await catalog1.documentCount();
const catalog2Count = await catalog2.documentCount();
const catalog3Count = await catalog3.documentCount();
const catalog4Count = await catalog4.documentCount();
const catalog5Count = await catalog5.documentCount();
if (
[
catalog1Count,
catalog2Count,
catalog3Count,
catalog4Count,
catalog5Count,
].every((e) => e === 4)
) {
const count1 = await catalog1.documentCount();
const count2 = await catalog2.documentCount();
const count3 = await catalog3.documentCount();
if ([count1, count2, count3].every((e) => e === expectedSitemapUrls)) {
docsFound = true;
} else {
console.log(
`Waiting for all 3 catalogs to be populated. C1: ${count1}, C2: ${count2}, C3: ${count3} docs found. sleeping...`,
);
await sleep(5000);
}
}

await catalog1.delete();
await catalog2.delete();
await catalog3.delete();
await catalog4.delete();
await catalog5.delete();
},
);

Expand Down

0 comments on commit e846225

Please sign in to comment.