Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CMR-10118: Update default paging functionality for CMR-STAC #363

Merged
merged 6 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
node_version: [18]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand All @@ -31,9 +31,9 @@ jobs:
node_version: [18]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
node_version: [18]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand Down
1 change: 1 addition & 0 deletions serverless.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ provider:
CMR_LB_URL: "https://cmr.sit.earthdata.nasa.gov"
GRAPHQL_URL: "https://graphql.sit.earthdata.nasa.gov/api"
STAC_VERSION: "1.0.0"
PAGE_SIZE: "100"

functions:
stac:
Expand Down
1 change: 1 addition & 0 deletions serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ provider:
CMR_LB_URL: ${param:cmr-lb-url, "http://${cf:${opt:stage}.servicesDnsName}"}
GRAPHQL_URL: ${param:graphql-url, "https://graphql${self:custom.stagePrefix.${opt:stage}}.earthdata.nasa.gov/api"}
STAC_VERSION: "1.0.0"
PAGE_SIZE: "100"
LOG_LEVEL: INFO

custom:
Expand Down
56 changes: 55 additions & 1 deletion src/__tests__/providerCatalog.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ import ItemSpec from "../../resources/item-spec/json-schema/item.json";

import { Link } from "../@types/StacCatalog";
import { createApp } from "../app";
import * as Provider from "../domains/providers";
import * as Collections from "../domains/collections";
import * as Provider from "../domains/providers";
import * as stac from "../domains/stac";
import { generateSTACCollections } from "../utils/testUtils";

const stacApp = createApp();
Expand Down Expand Up @@ -165,6 +166,59 @@ describe("GET /:provider", () => {
});
});

describe("when there are more results available", () => {
it("includes a 'next' link with the correct query parameters", async () => {
sandbox.stub(stac, "CMR_QUERY_MAX").value(100);
sandbox
.stub(Provider, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);
const mockCollections = generateSTACCollections(100);
sandbox.stub(Collections, "getCollectionIds").resolves({
count: 100,
cursor: "nextPageCursor",
items: mockCollections.map((coll) => ({
id: `${coll.id}`,
title: coll.title ?? faker.random.words(4),
})),
});

const { body: catalog } = await request(stacApp).get(`/stac/TEST`);

const nextLink = catalog.links.find((l: Link) => l.rel === "next");
expect(nextLink).to.exist;
expect(nextLink.rel).to.equal("next");
expect(nextLink.type).to.equal("application/json");
expect(nextLink.title).to.equal("Next page of results");

const nextUrl = new URL(nextLink.href);
expect(nextUrl.pathname).to.equal("/stac/TEST");
});
});

describe("when there are no more results available", () => {
it("does not include a 'next' link", async () => {
sandbox.stub(stac, "CMR_QUERY_MAX").value(100);
sandbox
.stub(Provider, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

const mockCollections = generateSTACCollections(10);
sandbox.stub(Collections, "getCollectionIds").resolves({
count: 10,
cursor: null,
items: mockCollections.map((coll) => ({
id: `${coll.id}`,
title: coll.title ?? faker.random.words(4),
})),
});

const { body: catalog } = await request(stacApp).get("/stac/TEST");

const nextLink = catalog.links.find((l: Link) => l.rel === "next");
expect(nextLink).to.not.exist;
});
});

describe(`given the provider has a collection`, () => {
it("has a child link for that collection without query parameters", async function () {
sandbox
Expand Down
4 changes: 2 additions & 2 deletions src/domains/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {

import { cmrSpatialToExtent } from "./bounding-box";

import { extractAssets, MAX_SIGNED_INTEGER, paginateQuery } from "./stac";
import { CMR_QUERY_MAX, extractAssets, paginateQuery } from "./stac";

const CMR_ROOT = process.env.CMR_URL;
const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0";
Expand Down Expand Up @@ -356,7 +356,7 @@ export const getAllCollectionIds = async (
cursor: string | null;
items: { id: string; title: string }[];
}> => {
params.limit = MAX_SIGNED_INTEGER;
params.limit = CMR_QUERY_MAX;

return await getCollectionIds(params, opts);
};
21 changes: 4 additions & 17 deletions src/domains/stac.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ export const extractAssets = (
{} as AssetLinks
);
const GRAPHQL_URL = process.env.GRAPHQL_URL ?? "http://localhost:3013";
export const CMR_QUERY_MAX = 2000;
export const MAX_SIGNED_INTEGER = 2 ** 31 - 1;

export const CMR_QUERY_MAX = Number(process.env.PAGE_SIZE);

const pointToQuery = (point: GeoJSONPoint) => point.coordinates.join(",");

Expand Down Expand Up @@ -553,8 +553,7 @@ export const paginateQuery = async (
opts: {
headers?: IncomingHttpHeaders;
},
handler: GraphQLHandler,
prevResults: unknown[] = []
handler: GraphQLHandler
): Promise<GraphQLResults> => {
const paginatedParams = { ...params };

Expand Down Expand Up @@ -589,19 +588,7 @@ export const paginateQuery = async (
if (!data) throw new Error("No data returned from GraphQL during paginated query");
const { count, cursor, items } = data;

const totalResults = [...prevResults, ...items];
const moreResultsAvailable = totalResults.length !== count && cursor != null;
const foundEnough = totalResults.length >= (params.limit ?? -1);

if (moreResultsAvailable && !foundEnough) {
console.debug(
`Retrieved ${totalResults.length} of ${params.limit} for ${JSON.stringify(params, null, 2)}`
);
const nextParams = mergeMaybe({ ...params }, { cursor });
return await paginateQuery(gqlQuery, nextParams, opts, handler, totalResults);
}

return { items: totalResults, count, cursor };
return { items: items, count, cursor };
} catch (err: unknown) {
if (
!(err instanceof Error) &&
Expand Down
47 changes: 38 additions & 9 deletions src/routes/catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ import { getAllCollectionIds } from "../domains/collections";
import { conformance } from "../domains/providers";
import { ServiceUnavailableError } from "../models/errors";
import { getBaseUrl, mergeMaybe, stacContext } from "../utils";
import { CMR_QUERY_MAX, stringifyQuery } from "../domains/stac";

const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0";

const generateSelfLinks = (req: Request): Links => {
const generateSelfLinks = (req: Request, nextCursor?: string | null, count?: number): Links => {
const { stacRoot, path, self } = stacContext(req);

return [
const links = [
{
rel: "self",
href: self,
Expand Down Expand Up @@ -72,20 +73,46 @@ const generateSelfLinks = (req: Request): Links => {
title: "HTML documentation",
},
];

const originalQuery = mergeMaybe(req.query, req.body);

// Add a 'next' link if there are more results available
// This is determined by:
// 1. The presence of a nextCursor (indicating more results)
// 2. The number of collection equaling CMR_QUERY_MAX (100)
// The 'next' link includes the original query parameters plus the new cursor
if (nextCursor && count === CMR_QUERY_MAX) {
const nextResultsQuery = { ...originalQuery, cursor: nextCursor };

links.push({
rel: "next",
href: `${stacRoot}${req.path}?${stringifyQuery(nextResultsQuery)}`,
type: "application/json",
title: "Next page of results",
});
}

return links;
};

const providerCollections = async (
req: Request
): Promise<[null, { id: string; title: string }[]] | [string, null]> => {
const { headers, provider } = req;
): Promise<[null, { id: string; title: string }[], string | null] | [string, null]> => {
const { headers, provider, query } = req;

const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {};

const query = mergeMaybe({ provider: provider?.["provider-id"] }, { ...cloudOnly });
const mergedQuery = mergeMaybe(
{
provider: provider?.["provider-id"],
cursor: query?.cursor,
},
{ ...cloudOnly }
);

try {
const { items } = await getAllCollectionIds(query, { headers });
return [null, items];
const { items, cursor } = await getAllCollectionIds(mergedQuery, { headers });
return [null, items, cursor];
} catch (err) {
console.error("A problem occurred querying for collections.", err);
return [(err as Error).message, null];
Expand All @@ -94,15 +121,17 @@ const providerCollections = async (

export const providerCatalogHandler = async (req: Request, res: Response) => {
const { provider } = req;

if (!provider) throw new ServiceUnavailableError("Could not retrieve provider information");

const [err, collections] = await providerCollections(req);
const [err, collections, cursor] = await providerCollections(req);

if (err) throw new ServiceUnavailableError(err as string);

const { self } = stacContext(req);

const selfLinks = generateSelfLinks(req);
const selfLinks = generateSelfLinks(req, cursor, collections?.length);

const childLinks = (collections ?? []).map(({ id, title }) => ({
rel: "child",
href: `${getBaseUrl(self)}/collections/${encodeURIComponent(id)}`,
Expand Down