Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CMR-10118: Update default paging functionality for CMR-STAC #363

Merged
merged 6 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions serverless.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ provider:
CMR_LB_URL: "https://cmr.sit.earthdata.nasa.gov"
GRAPHQL_URL: "https://graphql.sit.earthdata.nasa.gov/api"
STAC_VERSION: "1.0.0"
PAGE_SIZE: "100"

functions:
stac:
Expand Down
1 change: 1 addition & 0 deletions serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ provider:
CMR_LB_URL: ${param:cmr-lb-url, "http://${cf:${opt:stage}.servicesDnsName}"}
GRAPHQL_URL: ${param:graphql-url, "https://graphql${self:custom.stagePrefix.${opt:stage}}.earthdata.nasa.gov/api"}
STAC_VERSION: "1.0.0"
PAGE_SIZE: "100"
LOG_LEVEL: INFO

custom:
Expand Down
52 changes: 52 additions & 0 deletions src/__tests__/providerCatalog.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,58 @@ describe("GET /:provider", () => {
});
});

describe("when there are more results available", () => {
it("includes a 'next' link with the correct query parameters", async () => {
sandbox
.stub(Provider, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);
const mockCollections = generateSTACCollections(100);
sandbox.stub(Collections, "getCollectionIds").resolves({
count: 100,
cursor: "nextPageCursor",
items: mockCollections.map((coll) => ({
id: `${coll.id}`,
title: coll.title ?? faker.random.words(4),
})),
});

const { body: catalog } = await request(stacApp).get(`/stac/TEST`);

console.log("🚀 ~ it.only ~ catalog:", catalog);
dmistry1 marked this conversation as resolved.
Show resolved Hide resolved
const nextLink = catalog.links.find((l: Link) => l.rel === "next");
expect(nextLink).to.exist;
expect(nextLink.rel).to.equal("next");
expect(nextLink.type).to.equal("application/json");
expect(nextLink.title).to.equal("Next page of results");

const nextUrl = new URL(nextLink.href);
expect(nextUrl.pathname).to.equal("/stac/TEST");
});
});

describe("when there are no more results available", () => {
it("does not include a 'next' link", async () => {
sandbox
.stub(Provider, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

const mockCollections = generateSTACCollections(10);
sandbox.stub(Collections, "getCollectionIds").resolves({
count: 10,
cursor: null,
items: mockCollections.map((coll) => ({
id: `${coll.id}`,
title: coll.title ?? faker.random.words(4),
})),
});

const { body: catalog } = await request(stacApp).get("/stac/TEST");

const nextLink = catalog.links.find((l: Link) => l.rel === "next");
expect(nextLink).to.not.exist;
});
});

describe(`given the provider has a collection`, () => {
it("has a child link for that collection without query parameters", async function () {
sandbox
Expand Down
4 changes: 2 additions & 2 deletions src/domains/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {

import { cmrSpatialToExtent } from "./bounding-box";

import { extractAssets, MAX_SIGNED_INTEGER, paginateQuery } from "./stac";
import { CMR_QUERY_MAX, extractAssets, paginateQuery } from "./stac";

const CMR_ROOT = process.env.CMR_URL;
const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0";
Expand Down Expand Up @@ -356,7 +356,7 @@ export const getAllCollectionIds = async (
cursor: string | null;
items: { id: string; title: string }[];
}> => {
params.limit = MAX_SIGNED_INTEGER;
params.limit = CMR_QUERY_MAX;

return await getCollectionIds(params, opts);
};
21 changes: 4 additions & 17 deletions src/domains/stac.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ export const extractAssets = (
{} as AssetLinks
);
const GRAPHQL_URL = process.env.GRAPHQL_URL ?? "http://localhost:3013";
export const CMR_QUERY_MAX = 2000;
export const MAX_SIGNED_INTEGER = 2 ** 31 - 1;

export const CMR_QUERY_MAX = Number(process.env.PAGE_SIZE);

const pointToQuery = (point: GeoJSONPoint) => point.coordinates.join(",");

Expand Down Expand Up @@ -553,8 +553,7 @@ export const paginateQuery = async (
opts: {
headers?: IncomingHttpHeaders;
},
handler: GraphQLHandler,
prevResults: unknown[] = []
handler: GraphQLHandler
): Promise<GraphQLResults> => {
const paginatedParams = { ...params };

Expand Down Expand Up @@ -589,19 +588,7 @@ export const paginateQuery = async (
if (!data) throw new Error("No data returned from GraphQL during paginated query");
const { count, cursor, items } = data;

const totalResults = [...prevResults, ...items];
const moreResultsAvailable = totalResults.length !== count && cursor != null;
const foundEnough = totalResults.length >= (params.limit ?? -1);

if (moreResultsAvailable && !foundEnough) {
console.debug(
`Retrieved ${totalResults.length} of ${params.limit} for ${JSON.stringify(params, null, 2)}`
);
const nextParams = mergeMaybe({ ...params }, { cursor });
return await paginateQuery(gqlQuery, nextParams, opts, handler, totalResults);
}

return { items: totalResults, count, cursor };
return { items: items, count, cursor };
} catch (err: unknown) {
if (
!(err instanceof Error) &&
Expand Down
39 changes: 30 additions & 9 deletions src/routes/catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ import { getAllCollectionIds } from "../domains/collections";
import { conformance } from "../domains/providers";
import { ServiceUnavailableError } from "../models/errors";
import { getBaseUrl, mergeMaybe, stacContext } from "../utils";
import { stringifyQuery } from "../domains/stac";

const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0";

const generateSelfLinks = (req: Request): Links => {
const generateSelfLinks = (req: Request, nextCursor?: string | null): Links => {
const { stacRoot, path, self } = stacContext(req);

return [
const links = [
{
rel: "self",
href: self,
Expand Down Expand Up @@ -72,20 +73,38 @@ const generateSelfLinks = (req: Request): Links => {
title: "HTML documentation",
},
];

const originalQuery = mergeMaybe(req.query, req.body);

if (nextCursor) {
const nextResultsQuery = { ...originalQuery, cursor: nextCursor };

links.push({
rel: "next",
href: `${stacRoot}${req.path}?${stringifyQuery(nextResultsQuery)}`,
type: "application/json",
title: "Next page of results",
});
}

return links;
};

const providerCollections = async (
req: Request
): Promise<[null, { id: string; title: string }[]] | [string, null]> => {
const { headers, provider } = req;
): Promise<[null, { id: string; title: string }[], string | null] | [string, null]> => {
const { headers, provider, query } = req;

const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {};

const query = mergeMaybe({ provider: provider?.["provider-id"] }, { ...cloudOnly });
const query2 = mergeMaybe(
{ provider: provider?.["provider-id"], cursor: query?.cursor },
dmistry1 marked this conversation as resolved.
Show resolved Hide resolved
{ ...cloudOnly }
);

try {
const { items } = await getAllCollectionIds(query, { headers });
return [null, items];
const { items, cursor } = await getAllCollectionIds(query2, { headers });
dmistry1 marked this conversation as resolved.
Show resolved Hide resolved
return [null, items, cursor];
} catch (err) {
console.error("A problem occurred querying for collections.", err);
return [(err as Error).message, null];
Expand All @@ -94,15 +113,17 @@ const providerCollections = async (

export const providerCatalogHandler = async (req: Request, res: Response) => {
const { provider } = req;

if (!provider) throw new ServiceUnavailableError("Could not retrieve provider information");

const [err, collections] = await providerCollections(req);
const [err, collections, cursor] = await providerCollections(req);

if (err) throw new ServiceUnavailableError(err as string);

const { self } = stacContext(req);

const selfLinks = generateSelfLinks(req);
const selfLinks = generateSelfLinks(req, cursor);

const childLinks = (collections ?? []).map(({ id, title }) => ({
rel: "child",
href: `${getBaseUrl(self)}/collections/${encodeURIComponent(id)}`,
Expand Down
Loading