Skip to content

Commit

Permalink
CMR-10118: Update default paging functionality for CMR-STAC (#363)
Browse files Browse the repository at this point in the history
* CMR-10118: Updates default paging

* CMR-10118: Fixes lint

* CMR-10118: Address PR comments

* CMR-10118: Fixes lint issues

* CMR-10118: Addressing PR comments

* CMR-10118: Updates github action checkout@v3 and setup-node@v3 to v4
  • Loading branch information
dmistry1 authored Oct 18, 2024
1 parent 92565e4 commit 2a46736
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 35 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
node_version: [18]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand All @@ -31,9 +31,9 @@ jobs:
node_version: [18]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
node_version: [18]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand Down
1 change: 1 addition & 0 deletions serverless.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ provider:
CMR_LB_URL: "https://cmr.sit.earthdata.nasa.gov"
GRAPHQL_URL: "https://graphql.sit.earthdata.nasa.gov/api"
STAC_VERSION: "1.0.0"
PAGE_SIZE: "100"

functions:
stac:
Expand Down
1 change: 1 addition & 0 deletions serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ provider:
CMR_LB_URL: ${param:cmr-lb-url, "http://${cf:${opt:stage}.servicesDnsName}"}
GRAPHQL_URL: ${param:graphql-url, "https://graphql${self:custom.stagePrefix.${opt:stage}}.earthdata.nasa.gov/api"}
STAC_VERSION: "1.0.0"
PAGE_SIZE: "100"
LOG_LEVEL: INFO

custom:
Expand Down
56 changes: 55 additions & 1 deletion src/__tests__/providerCatalog.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ import ItemSpec from "../../resources/item-spec/json-schema/item.json";

import { Link } from "../@types/StacCatalog";
import { createApp } from "../app";
import * as Provider from "../domains/providers";
import * as Collections from "../domains/collections";
import * as Provider from "../domains/providers";
import * as stac from "../domains/stac";
import { generateSTACCollections } from "../utils/testUtils";

const stacApp = createApp();
Expand Down Expand Up @@ -165,6 +166,59 @@ describe("GET /:provider", () => {
});
});

describe("when there are more results available", () => {
it("includes a 'next' link with the correct query parameters", async () => {
sandbox.stub(stac, "CMR_QUERY_MAX").value(100);
sandbox
.stub(Provider, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);
const mockCollections = generateSTACCollections(100);
sandbox.stub(Collections, "getCollectionIds").resolves({
count: 100,
cursor: "nextPageCursor",
items: mockCollections.map((coll) => ({
id: `${coll.id}`,
title: coll.title ?? faker.random.words(4),
})),
});

const { body: catalog } = await request(stacApp).get(`/stac/TEST`);

const nextLink = catalog.links.find((l: Link) => l.rel === "next");
expect(nextLink).to.exist;
expect(nextLink.rel).to.equal("next");
expect(nextLink.type).to.equal("application/json");
expect(nextLink.title).to.equal("Next page of results");

const nextUrl = new URL(nextLink.href);
expect(nextUrl.pathname).to.equal("/stac/TEST");
});
});

describe("when there are no more results available", () => {
it("does not include a 'next' link", async () => {
sandbox.stub(stac, "CMR_QUERY_MAX").value(100);
sandbox
.stub(Provider, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

const mockCollections = generateSTACCollections(10);
sandbox.stub(Collections, "getCollectionIds").resolves({
count: 10,
cursor: null,
items: mockCollections.map((coll) => ({
id: `${coll.id}`,
title: coll.title ?? faker.random.words(4),
})),
});

const { body: catalog } = await request(stacApp).get("/stac/TEST");

const nextLink = catalog.links.find((l: Link) => l.rel === "next");
expect(nextLink).to.not.exist;
});
});

describe(`given the provider has a collection`, () => {
it("has a child link for that collection without query parameters", async function () {
sandbox
Expand Down
4 changes: 2 additions & 2 deletions src/domains/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {

import { cmrSpatialToExtent } from "./bounding-box";

import { extractAssets, MAX_SIGNED_INTEGER, paginateQuery } from "./stac";
import { CMR_QUERY_MAX, extractAssets, paginateQuery } from "./stac";

const CMR_ROOT = process.env.CMR_URL;
const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0";
Expand Down Expand Up @@ -356,7 +356,7 @@ export const getAllCollectionIds = async (
cursor: string | null;
items: { id: string; title: string }[];
}> => {
params.limit = MAX_SIGNED_INTEGER;
params.limit = CMR_QUERY_MAX;

return await getCollectionIds(params, opts);
};
21 changes: 4 additions & 17 deletions src/domains/stac.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ export const extractAssets = (
{} as AssetLinks
);
const GRAPHQL_URL = process.env.GRAPHQL_URL ?? "http://localhost:3013";
export const CMR_QUERY_MAX = 2000;
export const MAX_SIGNED_INTEGER = 2 ** 31 - 1;

export const CMR_QUERY_MAX = Number(process.env.PAGE_SIZE);

const pointToQuery = (point: GeoJSONPoint) => point.coordinates.join(",");

Expand Down Expand Up @@ -553,8 +553,7 @@ export const paginateQuery = async (
opts: {
headers?: IncomingHttpHeaders;
},
handler: GraphQLHandler,
prevResults: unknown[] = []
handler: GraphQLHandler
): Promise<GraphQLResults> => {
const paginatedParams = { ...params };

Expand Down Expand Up @@ -589,19 +588,7 @@ export const paginateQuery = async (
if (!data) throw new Error("No data returned from GraphQL during paginated query");
const { count, cursor, items } = data;

const totalResults = [...prevResults, ...items];
const moreResultsAvailable = totalResults.length !== count && cursor != null;
const foundEnough = totalResults.length >= (params.limit ?? -1);

if (moreResultsAvailable && !foundEnough) {
console.debug(
`Retrieved ${totalResults.length} of ${params.limit} for ${JSON.stringify(params, null, 2)}`
);
const nextParams = mergeMaybe({ ...params }, { cursor });
return await paginateQuery(gqlQuery, nextParams, opts, handler, totalResults);
}

return { items: totalResults, count, cursor };
return { items: items, count, cursor };
} catch (err: unknown) {
if (
!(err instanceof Error) &&
Expand Down
47 changes: 38 additions & 9 deletions src/routes/catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ import { getAllCollectionIds } from "../domains/collections";
import { conformance } from "../domains/providers";
import { ServiceUnavailableError } from "../models/errors";
import { getBaseUrl, mergeMaybe, stacContext } from "../utils";
import { CMR_QUERY_MAX, stringifyQuery } from "../domains/stac";

const STAC_VERSION = process.env.STAC_VERSION ?? "1.0.0";

const generateSelfLinks = (req: Request): Links => {
const generateSelfLinks = (req: Request, nextCursor?: string | null, count?: number): Links => {
const { stacRoot, path, self } = stacContext(req);

return [
const links = [
{
rel: "self",
href: self,
Expand Down Expand Up @@ -72,20 +73,46 @@ const generateSelfLinks = (req: Request): Links => {
title: "HTML documentation",
},
];

const originalQuery = mergeMaybe(req.query, req.body);

// Add a 'next' link if there are more results available
// This is determined by:
// 1. The presence of a nextCursor (indicating more results)
// 2. The number of collection equaling CMR_QUERY_MAX (100)
// The 'next' link includes the original query parameters plus the new cursor
if (nextCursor && count === CMR_QUERY_MAX) {
const nextResultsQuery = { ...originalQuery, cursor: nextCursor };

links.push({
rel: "next",
href: `${stacRoot}${req.path}?${stringifyQuery(nextResultsQuery)}`,
type: "application/json",
title: "Next page of results",
});
}

return links;
};

const providerCollections = async (
req: Request
): Promise<[null, { id: string; title: string }[]] | [string, null]> => {
const { headers, provider } = req;
): Promise<[null, { id: string; title: string }[], string | null] | [string, null]> => {
const { headers, provider, query } = req;

const cloudOnly = headers["cloud-stac"] === "true" ? { cloudHosted: true } : {};

const query = mergeMaybe({ provider: provider?.["provider-id"] }, { ...cloudOnly });
const mergedQuery = mergeMaybe(
{
provider: provider?.["provider-id"],
cursor: query?.cursor,
},
{ ...cloudOnly }
);

try {
const { items } = await getAllCollectionIds(query, { headers });
return [null, items];
const { items, cursor } = await getAllCollectionIds(mergedQuery, { headers });
return [null, items, cursor];
} catch (err) {
console.error("A problem occurred querying for collections.", err);
return [(err as Error).message, null];
Expand All @@ -94,15 +121,17 @@ const providerCollections = async (

export const providerCatalogHandler = async (req: Request, res: Response) => {
const { provider } = req;

if (!provider) throw new ServiceUnavailableError("Could not retrieve provider information");

const [err, collections] = await providerCollections(req);
const [err, collections, cursor] = await providerCollections(req);

if (err) throw new ServiceUnavailableError(err as string);

const { self } = stacContext(req);

const selfLinks = generateSelfLinks(req);
const selfLinks = generateSelfLinks(req, cursor, collections?.length);

const childLinks = (collections ?? []).map(({ id, title }) => ({
rel: "child",
href: `${getBaseUrl(self)}/collections/${encodeURIComponent(id)}`,
Expand Down

0 comments on commit 2a46736

Please sign in to comment.