From 8528286416ed39630dcbf10c50ca77503d25c84a Mon Sep 17 00:00:00 2001 From: Deep Mistry Date: Thu, 17 Oct 2024 11:56:22 -0400 Subject: [PATCH 1/6] CMR-10118: Updates default paging --- serverless.dev.yml | 1 + serverless.yml | 1 + src/__tests__/providerCatalog.spec.ts | 52 +++++++++++++++++++++++++++ src/domains/collections.ts | 4 +-- src/domains/stac.ts | 18 ++-------- src/routes/catalog.ts | 36 ++++++++++++++----- 6 files changed, 86 insertions(+), 26 deletions(-) diff --git a/serverless.dev.yml b/serverless.dev.yml index 7dcb13ae..3a1b2dd5 100644 --- a/serverless.dev.yml +++ b/serverless.dev.yml @@ -9,6 +9,7 @@ provider: CMR_LB_URL: "https://cmr.sit.earthdata.nasa.gov" GRAPHQL_URL: "https://graphql.sit.earthdata.nasa.gov/api" STAC_VERSION: "1.0.0" + PAGE_SIZE: 100 functions: stac: diff --git a/serverless.yml b/serverless.yml index 6d1a293c..9b0f35ee 100644 --- a/serverless.yml +++ b/serverless.yml @@ -15,6 +15,7 @@ provider: CMR_LB_URL: ${param:cmr-lb-url, "http://${cf:${opt:stage}.servicesDnsName}"} GRAPHQL_URL: ${param:graphql-url, "https://graphql${self:custom.stagePrefix.${opt:stage}}.earthdata.nasa.gov/api"} STAC_VERSION: "1.0.0" + PAGE_SIZE: "100" LOG_LEVEL: INFO custom: diff --git a/src/__tests__/providerCatalog.spec.ts b/src/__tests__/providerCatalog.spec.ts index 6c8666a8..da1c106e 100644 --- a/src/__tests__/providerCatalog.spec.ts +++ b/src/__tests__/providerCatalog.spec.ts @@ -165,6 +165,58 @@ describe("GET /:provider", () => { }); }); + describe("when there are more results available", () => { + it("includes a 'next' link with the correct query parameters", async () => { + sandbox + .stub(Provider, "getProviders") + .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); + const mockCollections = generateSTACCollections(100); + sandbox.stub(Collections, "getCollectionIds").resolves({ + count: 100, + cursor: "nextPageCursor", + items: mockCollections.map((coll) => ({ + id: `${coll.id}`, + title: coll.title ?? faker.random.words(4), + })), + }); + + const { body: catalog } = await request(stacApp).get(`/stac/TEST`); + + console.log("🚀 ~ it.only ~ catalog:", catalog) + const nextLink = catalog.links.find((l: Link) => l.rel === "next"); + expect(nextLink).to.exist; + expect(nextLink.rel).to.equal("next"); + expect(nextLink.type).to.equal("application/json"); + expect(nextLink.title).to.equal("Next page of results"); + + const nextUrl = new URL(nextLink.href); + expect(nextUrl.pathname).to.equal("/stac/TEST"); + }); + }); + + describe("when there are no more results available", () => { + it("does not include a 'next' link", async () => { + sandbox + .stub(Provider, "getProviders") + .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); + + const mockCollections = generateSTACCollections(10); + sandbox.stub(Collections, "getCollectionIds").resolves({ + count: 10, + cursor: null, + items: mockCollections.map((coll) => ({ + id: `${coll.id}`, + title: coll.title ?? faker.random.words(4), + })), + }); + + const { body: catalog } = await request(stacApp).get("/stac/TEST"); + + const nextLink = catalog.links.find((l: Link) => l.rel === "next"); + expect(nextLink).to.not.exist; + }); + }); + describe(`given the provider has a collection`, () => { it("has a child link for that collection without query parameters", async function () { sandbox diff --git a/src/domains/collections.ts b/src/domains/collections.ts index ffc67d87..3b54828d 100644 --- a/src/domains/collections.ts +++ b/src/domains/collections.ts @@ -16,7 +16,7 @@ import { import { cmrSpatialToExtent } from "./bounding-box"; -import { extractAssets, MAX_SIGNED_INTEGER, paginateQuery } from "./stac"; +import { CMR_QUERY_MAX, extractAssets, paginateQuery } from "./stac"; const CMR_ROOT = process.env.CMR_URL; const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0"; @@ -356,7 +356,7 @@ export const getAllCollectionIds = async ( cursor: string | null; items: { id: string; title: string }[]; }> => { - params.limit = MAX_SIGNED_INTEGER; + params.limit = CMR_QUERY_MAX return await getCollectionIds(params, opts); }; diff --git a/src/domains/stac.ts b/src/domains/stac.ts index 860f7611..08974d72 100644 --- a/src/domains/stac.ts +++ b/src/domains/stac.ts @@ -205,8 +205,8 @@ export const extractAssets = ( {} as AssetLinks ); const GRAPHQL_URL = process.env.GRAPHQL_URL ?? "http://localhost:3013"; -export const CMR_QUERY_MAX = 2000; -export const MAX_SIGNED_INTEGER = 2 ** 31 - 1; + +export const CMR_QUERY_MAX = Number(process.env.PAGE_SIZE); const pointToQuery = (point: GeoJSONPoint) => point.coordinates.join(","); @@ -554,7 +554,6 @@ export const paginateQuery = async ( headers?: IncomingHttpHeaders; }, handler: GraphQLHandler, - prevResults: unknown[] = [] ): Promise => { const paginatedParams = { ...params }; @@ -589,19 +588,8 @@ export const paginateQuery = async ( if (!data) throw new Error("No data returned from GraphQL during paginated query"); const { count, cursor, items } = data; - const totalResults = [...prevResults, ...items]; - const moreResultsAvailable = totalResults.length !== count && cursor != null; - const foundEnough = totalResults.length >= (params.limit ?? -1); - - if (moreResultsAvailable && !foundEnough) { - console.debug( - `Retrieved ${totalResults.length} of ${params.limit} for ${JSON.stringify(params, null, 2)}` - ); - const nextParams = mergeMaybe({ ...params }, { cursor }); - return await paginateQuery(gqlQuery, nextParams, opts, handler, totalResults); - } + return { items: items, count, cursor }; - return { items: totalResults, count, cursor }; } catch (err: unknown) { if ( !(err instanceof Error) && diff --git a/src/routes/catalog.ts b/src/routes/catalog.ts index 7f516948..ff886f7a 100644 --- a/src/routes/catalog.ts +++ b/src/routes/catalog.ts @@ -6,13 +6,14 @@ import { getAllCollectionIds } from "../domains/collections"; import { conformance } from "../domains/providers"; import { ServiceUnavailableError } from "../models/errors"; import { getBaseUrl, mergeMaybe, stacContext } from "../utils"; +import { stringifyQuery } from "../domains/stac"; const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0"; -const generateSelfLinks = (req: Request): Links => { +const generateSelfLinks = (req: Request, nextCursor?: string | null): Links => { const { stacRoot, path, self } = stacContext(req); - return [ + const links = [ { rel: "self", href: self, @@ -72,20 +73,35 @@ const generateSelfLinks = (req: Request): Links => { title: "HTML documentation", }, ]; + + const originalQuery = mergeMaybe(req.query, req.body); + + if (nextCursor) { + const nextResultsQuery = { ...originalQuery, cursor: nextCursor }; + + links.push({ + rel: "next", + href: `${stacRoot}${req.path}?${stringifyQuery(nextResultsQuery)}`, + type: "application/json", + title: "Next page of results" + }); + } + + return links }; const providerCollections = async ( req: Request -): Promise<[null, { id: string; title: string }[]] | [string, null]> => { - const { headers, provider } = req; +): Promise<[null, { id: string; title: string }[], string | null] | [string, null]> => { + const { headers, provider, query } = req; const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {}; - const query = mergeMaybe({ provider: provider?.["provider-id"] }, { ...cloudOnly }); + const query2 = mergeMaybe({ provider: provider?.["provider-id"], cursor: query?.cursor }, { ...cloudOnly }); try { - const { items } = await getAllCollectionIds(query, { headers }); - return [null, items]; + const { items, cursor } = await getAllCollectionIds(query2, { headers }); + return [null, items, cursor]; } catch (err) { console.error("A problem occurred querying for collections.", err); return [(err as Error).message, null]; @@ -94,15 +110,17 @@ const providerCollections = async ( export const providerCatalogHandler = async (req: Request, res: Response) => { const { provider } = req; + if (!provider) throw new ServiceUnavailableError("Could not retrieve provider information"); - const [err, collections] = await providerCollections(req); + const [err, collections, cursor] = await providerCollections(req); if (err) throw new ServiceUnavailableError(err as string); const { self } = stacContext(req); - const selfLinks = generateSelfLinks(req); + const selfLinks = generateSelfLinks(req, cursor); + const childLinks = (collections ?? []).map(({ id, title }) => ({ rel: "child", href: `${getBaseUrl(self)}/collections/${encodeURIComponent(id)}`, From 1432e10bfb5032ac06c5e764b22b3dd137144de0 Mon Sep 17 00:00:00 2001 From: Deep Mistry Date: Thu, 17 Oct 2024 12:03:50 -0400 Subject: [PATCH 2/6] CMR-10118: Fixes lint --- serverless.dev.yml | 2 +- src/__tests__/providerCatalog.spec.ts | 14 +++++++------- src/domains/collections.ts | 2 +- src/domains/stac.ts | 3 +-- src/routes/catalog.ts | 13 ++++++++----- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/serverless.dev.yml b/serverless.dev.yml index 3a1b2dd5..fc7bbc2c 100644 --- a/serverless.dev.yml +++ b/serverless.dev.yml @@ -9,7 +9,7 @@ provider: CMR_LB_URL: "https://cmr.sit.earthdata.nasa.gov" GRAPHQL_URL: "https://graphql.sit.earthdata.nasa.gov/api" STAC_VERSION: "1.0.0" - PAGE_SIZE: 100 + PAGE_SIZE: "100" functions: stac: diff --git a/src/__tests__/providerCatalog.spec.ts b/src/__tests__/providerCatalog.spec.ts index da1c106e..0a6d4fed 100644 --- a/src/__tests__/providerCatalog.spec.ts +++ b/src/__tests__/providerCatalog.spec.ts @@ -168,8 +168,8 @@ describe("GET /:provider", () => { describe("when there are more results available", () => { it("includes a 'next' link with the correct query parameters", async () => { sandbox - .stub(Provider, "getProviders") - .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); + .stub(Provider, "getProviders") + .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); const mockCollections = generateSTACCollections(100); sandbox.stub(Collections, "getCollectionIds").resolves({ count: 100, @@ -181,14 +181,14 @@ describe("GET /:provider", () => { }); const { body: catalog } = await request(stacApp).get(`/stac/TEST`); - - console.log("🚀 ~ it.only ~ catalog:", catalog) + + console.log("🚀 ~ it.only ~ catalog:", catalog); const nextLink = catalog.links.find((l: Link) => l.rel === "next"); expect(nextLink).to.exist; expect(nextLink.rel).to.equal("next"); expect(nextLink.type).to.equal("application/json"); expect(nextLink.title).to.equal("Next page of results"); - + const nextUrl = new URL(nextLink.href); expect(nextUrl.pathname).to.equal("/stac/TEST"); }); @@ -199,7 +199,7 @@ describe("GET /:provider", () => { sandbox .stub(Provider, "getProviders") .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); - + const mockCollections = generateSTACCollections(10); sandbox.stub(Collections, "getCollectionIds").resolves({ count: 10, @@ -211,7 +211,7 @@ describe("GET /:provider", () => { }); const { body: catalog } = await request(stacApp).get("/stac/TEST"); - + const nextLink = catalog.links.find((l: Link) => l.rel === "next"); expect(nextLink).to.not.exist; }); diff --git a/src/domains/collections.ts b/src/domains/collections.ts index 3b54828d..3d7f2f8f 100644 --- a/src/domains/collections.ts +++ b/src/domains/collections.ts @@ -356,7 +356,7 @@ export const getAllCollectionIds = async ( cursor: string | null; items: { id: string; title: string }[]; }> => { - params.limit = CMR_QUERY_MAX + params.limit = CMR_QUERY_MAX; return await getCollectionIds(params, opts); }; diff --git a/src/domains/stac.ts b/src/domains/stac.ts index 08974d72..e2358cb1 100644 --- a/src/domains/stac.ts +++ b/src/domains/stac.ts @@ -553,7 +553,7 @@ export const paginateQuery = async ( opts: { headers?: IncomingHttpHeaders; }, - handler: GraphQLHandler, + handler: GraphQLHandler ): Promise => { const paginatedParams = { ...params }; @@ -589,7 +589,6 @@ export const paginateQuery = async ( const { count, cursor, items } = data; return { items: items, count, cursor }; - } catch (err: unknown) { if ( !(err instanceof Error) && diff --git a/src/routes/catalog.ts b/src/routes/catalog.ts index ff886f7a..97b14702 100644 --- a/src/routes/catalog.ts +++ b/src/routes/catalog.ts @@ -81,13 +81,13 @@ const generateSelfLinks = (req: Request, nextCursor?: string | null): Links => { links.push({ rel: "next", - href: `${stacRoot}${req.path}?${stringifyQuery(nextResultsQuery)}`, + href: `${stacRoot}${req.path}?${stringifyQuery(nextResultsQuery)}`, type: "application/json", - title: "Next page of results" + title: "Next page of results", }); } - return links + return links; }; const providerCollections = async ( @@ -97,7 +97,10 @@ const providerCollections = async ( const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {}; - const query2 = mergeMaybe({ provider: provider?.["provider-id"], cursor: query?.cursor }, { ...cloudOnly }); + const query2 = mergeMaybe( + { provider: provider?.["provider-id"], cursor: query?.cursor }, + { ...cloudOnly } + ); try { const { items, cursor } = await getAllCollectionIds(query2, { headers }); @@ -120,7 +123,7 @@ export const providerCatalogHandler = async (req: Request, res: Response) => { const { self } = stacContext(req); const selfLinks = generateSelfLinks(req, cursor); - + const childLinks = (collections ?? []).map(({ id, title }) => ({ rel: "child", href: `${getBaseUrl(self)}/collections/${encodeURIComponent(id)}`, From 4694cbbc701cfc4f321d175f3bdca0d687b07641 Mon Sep 17 00:00:00 2001 From: Deep Mistry Date: Thu, 17 Oct 2024 15:13:09 -0400 Subject: [PATCH 3/6] CMR-10118: Address PR comments --- src/__tests__/providerCatalog.spec.ts | 6 ++++-- src/routes/catalog.ts | 17 +++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/__tests__/providerCatalog.spec.ts b/src/__tests__/providerCatalog.spec.ts index 0a6d4fed..3df5c97d 100644 --- a/src/__tests__/providerCatalog.spec.ts +++ b/src/__tests__/providerCatalog.spec.ts @@ -13,8 +13,9 @@ import ItemSpec from "../../resources/item-spec/json-schema/item.json"; import { Link } from "../@types/StacCatalog"; import { createApp } from "../app"; -import * as Provider from "../domains/providers"; import * as Collections from "../domains/collections"; +import * as Provider from "../domains/providers"; +import * as stac from "../domains/stac"; import { generateSTACCollections } from "../utils/testUtils"; const stacApp = createApp(); @@ -167,6 +168,7 @@ describe("GET /:provider", () => { describe("when there are more results available", () => { it("includes a 'next' link with the correct query parameters", async () => { + sandbox.stub(stac, 'CMR_QUERY_MAX').value(100); sandbox .stub(Provider, "getProviders") .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); @@ -182,7 +184,6 @@ describe("GET /:provider", () => { const { body: catalog } = await request(stacApp).get(`/stac/TEST`); - console.log("🚀 ~ it.only ~ catalog:", catalog); const nextLink = catalog.links.find((l: Link) => l.rel === "next"); expect(nextLink).to.exist; expect(nextLink.rel).to.equal("next"); @@ -196,6 +197,7 @@ describe("GET /:provider", () => { describe("when there are no more results available", () => { it("does not include a 'next' link", async () => { + sandbox.stub(stac, 'CMR_QUERY_MAX').value(100); sandbox .stub(Provider, "getProviders") .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); diff --git a/src/routes/catalog.ts b/src/routes/catalog.ts index 97b14702..d57bfd67 100644 --- a/src/routes/catalog.ts +++ b/src/routes/catalog.ts @@ -6,11 +6,11 @@ import { getAllCollectionIds } from "../domains/collections"; import { conformance } from "../domains/providers"; import { ServiceUnavailableError } from "../models/errors"; import { getBaseUrl, mergeMaybe, stacContext } from "../utils"; -import { stringifyQuery } from "../domains/stac"; +import { CMR_QUERY_MAX, stringifyQuery } from "../domains/stac"; const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0"; -const generateSelfLinks = (req: Request, nextCursor?: string | null): Links => { +const generateSelfLinks = (req: Request, nextCursor?: string | null, count?: number): Links => { const { stacRoot, path, self } = stacContext(req); const links = [ @@ -76,7 +76,12 @@ const generateSelfLinks = (req: Request, nextCursor?: string | null): Links => { const originalQuery = mergeMaybe(req.query, req.body); - if (nextCursor) { + // Add a 'next' link if there are more results available + // This is determined by: + // 1. The presence of a nextCursor (indicating more results) + // 2. The number of collection equaling CMR_QUERY_MAX (100) + // The 'next' link includes the original query parameters plus the new cursor + if (nextCursor && count === CMR_QUERY_MAX) { const nextResultsQuery = { ...originalQuery, cursor: nextCursor }; links.push({ @@ -97,13 +102,13 @@ const providerCollections = async ( const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {}; - const query2 = mergeMaybe( + const mergedQuery = mergeMaybe( { provider: provider?.["provider-id"], cursor: query?.cursor }, { ...cloudOnly } ); try { - const { items, cursor } = await getAllCollectionIds(query2, { headers }); + const { items, cursor } = await getAllCollectionIds(mergedQuery, { headers }); return [null, items, cursor]; } catch (err) { console.error("A problem occurred querying for collections.", err); @@ -122,7 +127,7 @@ export const providerCatalogHandler = async (req: Request, res: Response) => { const { self } = stacContext(req); - const selfLinks = generateSelfLinks(req, cursor); + const selfLinks = generateSelfLinks(req, cursor, collections?.length); const childLinks = (collections ?? []).map(({ id, title }) => ({ rel: "child", From d8461ff17b7924a6b27681e8b40e11b6db742229 Mon Sep 17 00:00:00 2001 From: Deep Mistry Date: Thu, 17 Oct 2024 15:21:03 -0400 Subject: [PATCH 4/6] CMR-10118: Fixes lint issues --- src/__tests__/providerCatalog.spec.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/__tests__/providerCatalog.spec.ts b/src/__tests__/providerCatalog.spec.ts index 3df5c97d..bd1ecf2f 100644 --- a/src/__tests__/providerCatalog.spec.ts +++ b/src/__tests__/providerCatalog.spec.ts @@ -168,7 +168,7 @@ describe("GET /:provider", () => { describe("when there are more results available", () => { it("includes a 'next' link with the correct query parameters", async () => { - sandbox.stub(stac, 'CMR_QUERY_MAX').value(100); + sandbox.stub(stac, "CMR_QUERY_MAX").value(100); sandbox .stub(Provider, "getProviders") .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); @@ -197,7 +197,7 @@ describe("GET /:provider", () => { describe("when there are no more results available", () => { it("does not include a 'next' link", async () => { - sandbox.stub(stac, 'CMR_QUERY_MAX').value(100); + sandbox.stub(stac, "CMR_QUERY_MAX").value(100); sandbox .stub(Provider, "getProviders") .resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]); From 18fde7efa754abcbccefca9fe5b6e9478298e532 Mon Sep 17 00:00:00 2001 From: Deep Mistry Date: Fri, 18 Oct 2024 10:20:14 -0400 Subject: [PATCH 5/6] CMR-10118: Addressing PR comments --- src/routes/catalog.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/routes/catalog.ts b/src/routes/catalog.ts index d57bfd67..d378abc3 100644 --- a/src/routes/catalog.ts +++ b/src/routes/catalog.ts @@ -103,7 +103,10 @@ const providerCollections = async ( const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {}; const mergedQuery = mergeMaybe( - { provider: provider?.["provider-id"], cursor: query?.cursor }, + { + provider: provider?.["provider-id"], + cursor: query?.cursor, + }, { ...cloudOnly } ); From 9a3c0ee5b2fae18f81dfabb79c2f191eb3e84ee1 Mon Sep 17 00:00:00 2001 From: Deep Mistry Date: Fri, 18 Oct 2024 10:39:30 -0400 Subject: [PATCH 6/6] CMR-10118: Updates github action checkout@v3 and setup-node@v3 to v4 --- .github/workflows/lint.yaml | 8 ++++---- .github/workflows/run_tests.yaml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 65b77da4..578f03c2 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -12,9 +12,9 @@ jobs: node_version: [18] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node_version }} @@ -31,9 +31,9 @@ jobs: node_version: [18] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node_version }} diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 62ee2607..450a6972 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -18,9 +18,9 @@ jobs: node_version: [18] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node_version }}