From 5b3d52bc7c1e309dde2b6dd18d68558d6871dec1 Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 18:30:11 +0100 Subject: [PATCH 1/8] feat: styling rfcs --- apps/engineering/app/docs/layout.tsx | 7 ++-- .../engineering/app/rfcs/[[...slug]]/page.tsx | 9 ++--- apps/engineering/app/rfcs/layout.tsx | 8 +++-- .../content/rfcs/0000-template.mdx | 33 +++++++++++++++++++ .../rfcs/0002-github-secret-scanning.mdx | 2 +- 5 files changed, 45 insertions(+), 14 deletions(-) create mode 100644 apps/engineering/content/rfcs/0000-template.mdx diff --git a/apps/engineering/app/docs/layout.tsx b/apps/engineering/app/docs/layout.tsx index 7a3a68fd33..72d183f004 100644 --- a/apps/engineering/app/docs/layout.tsx +++ b/apps/engineering/app/docs/layout.tsx @@ -1,7 +1,6 @@ import { source } from "@/app/source"; -import { RootToggle } from "fumadocs-ui/components/layout/root-toggle"; import { DocsLayout } from "fumadocs-ui/layouts/docs"; -import { Code, Component, Handshake, Terminal } from "lucide-react"; +import { Code, Component, Terminal } from "lucide-react"; import type { ReactNode } from "react"; import { baseOptions } from "../layout.config"; @@ -39,10 +38,10 @@ export default function Layout({ children }: { children: ReactNode }) { url: "/docs/architecture", icon: , }, - ] + ], }} > {children} - + ); } diff --git a/apps/engineering/app/rfcs/[[...slug]]/page.tsx b/apps/engineering/app/rfcs/[[...slug]]/page.tsx index 8d837b9835..6caded34cf 100644 --- a/apps/engineering/app/rfcs/[[...slug]]/page.tsx +++ b/apps/engineering/app/rfcs/[[...slug]]/page.tsx @@ -12,7 +12,6 @@ export default async function Page({ }: { params: { slug?: string[] }; }) { - const page = rfcSource.getPage(params.slug); if (!page) { @@ -25,13 +24,11 @@ export default async function Page({

RFCS -

Check the sidebar

- - ) + ); } const MDX = page.data.body; @@ -50,9 +47,9 @@ export default async function Page({ - {page.data.description} + {page.data.description} - + diff --git a/apps/engineering/app/rfcs/layout.tsx b/apps/engineering/app/rfcs/layout.tsx index 76e24baa47..2a74b754ce 100644 --- a/apps/engineering/app/rfcs/layout.tsx +++ b/apps/engineering/app/rfcs/layout.tsx @@ -5,8 +5,10 @@ import { baseOptions } from "../layout.config"; export default function Layout({ children }: { children: ReactNode }) { return ( - - {children} - +
+ + {children} + +
); } diff --git a/apps/engineering/content/rfcs/0000-template.mdx b/apps/engineering/content/rfcs/0000-template.mdx new file mode 100644 index 0000000000..00187c58dc --- /dev/null +++ b/apps/engineering/content/rfcs/0000-template.mdx @@ -0,0 +1,33 @@ +--- +title: 0000 Template +description: You may copy this as a starting point, but it's not required +date: 2024-11-25 +authors: + - Andreas Thomas + - Someone Else +--- + + +## Summary + +One paragraph explanation of the feature. + +## Motivation + +Why are we doing this? What use cases does it support? What is the expected outcome? + +## Detailed design + +This is the bulk of the RFC. Explain the design in enough detail for somebody familiar with the network to understand, and for somebody familiar with the code practices to implement. This should get into specifics and corner-cases, and include examples of how the feature is used. + +## Drawbacks + +I Why should we not do this? + +## Alternatives + +What other designs have been considered? What is the impact of not doing this? + +## Unresolved questions + +What parts of the design are still to be done? diff --git a/apps/engineering/content/rfcs/0002-github-secret-scanning.mdx b/apps/engineering/content/rfcs/0002-github-secret-scanning.mdx index 476a13d549..b0c55124f2 100644 --- a/apps/engineering/content/rfcs/0002-github-secret-scanning.mdx +++ b/apps/engineering/content/rfcs/0002-github-secret-scanning.mdx @@ -1,5 +1,5 @@ --- -title: 0002 GitHub Secret Scanning +title: 0002 Secret Scanning authors: - Dom Eccleston date: 2024-01-14 From dc0d68f4756816d6b49e0c3b813ad8abae45e0b4 Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 18:30:42 +0100 Subject: [PATCH 2/8] feat: rfc --- .../content/rfcs/0005-analytics-api.mdx | 221 +++++++++++++++++- 1 file changed, 215 insertions(+), 6 deletions(-) diff --git a/apps/engineering/content/rfcs/0005-analytics-api.mdx b/apps/engineering/content/rfcs/0005-analytics-api.mdx index 88ad216b94..151c01ba6e 100644 --- a/apps/engineering/content/rfcs/0005-analytics-api.mdx +++ b/apps/engineering/content/rfcs/0005-analytics-api.mdx @@ -8,23 +8,232 @@ authors: --- - ## Motivation -Why are we doing this? What use cases does it support? What is the expected outcome? +Consumption based billing for APIs is getting more and more popular, but it's tedious to build in house. +For low frequency events, it's quite possible to emit usage events directly to Stripe or similar, but this becomes very noisy quickly. +Furthermore if you want to build end-user facing or internal analytics, you need to be able to query the events from Stripe, which often does not provide the granularity required. + +Most teams end up without end-user facing analytics, or build their own system to store and query usage metrics. + +Since Unkey already stores and aggregates verification events by time, outcome and identity, we can offer this data via an API. ## Detailed design -This is the bulk of the RFC. Explain the design in enough detail for somebody familiar with the network to understand, and for somebody familiar with the code practices to implement. This should get into specifics and corner-cases, and include examples of how the feature is used. +In order to charge for usage, our users need information of **who** used their API **when** and **how often**. + +For end-user facing analytics dashboards, it would also be relevant to differentiate between different outcomes (`VALID`, `RATE_LIMITED`, `USAGE_EXCEEDED`, `INSUFFICIENT_PERMiSSIONS` etc.) + +### Available data + +We already store events for every verification in ClickHouse and have materialized views for aggregations. + +```sql +`request_id` String, +`time` Int64, +`workspace_id` String, +`key_space_id` String, +`key_id` String, +`region` LowCardinality(String), +`outcome` LowCardinality(String), +`identity_id` String +``` + + + +We can return this data in different granularities: +- hourly +- daily +- monthly + + +In order be scalable, we will not expose individual events in the beginning, nor allow you to filter by exact timestamps. If we can't query a materialized view, it would be too compute-intensive to query. +If needed, a per-minute granularity materialized view could be created, but is not currently planned. + + +And filtered by: +- identity_id +- key_space_id (which we can derive from the api_id) +- key_id +- outcome +- start and end time + + +### Request + + +We will create a new endpoint `GET /v1/analytics.getVerifications`, protected by a root key in the `Authorization` header. +The root key will require specific permissions tbd. + +Calling the endpoint will return an array of verification counts, aggregated by time and provided filters. + +All required and optional arguments are passed via query parameters. Some paramters may be specific multiple times, either as You may specify multiple ids such as `?param=value_1,value_2` or `?param=value_1¶m=value_2` + +#### start +(integer, required) + +Unix timestamp in milliseconds to specify the start of the interval to retrieve. + +We will return all datapoints with a timestamp greater or equal to `start`. + +There may be restrictions depending on the granularity chosen and the retention quota of the customer + +#### end +(integer, required) + +Unix timestamp in milliseconds to specify the end of the interval to retrieve. + +We will return all datapoints with a timestamp less than or equal to `end`. + +There may be restrictions depending on the granularity chosen and the retention quota of the customer + +#### granularity +(enum ["hour", "day", "month"], required) + +Selects the granularity of data. For example selecting `hour` will return one datapoint per hour. + +#### apiId +(string, optional, may be provided multiple times) + +Select the API for which to return data. + +When you are providing zero or more than one API ids, all usage counts are aggregated and summed up. +Send multiple requests with one apiId each if you need counts per API. + +#### externalId +(string, optional, may be provided multiple times) + +Filtering by externalId allows you to narrow down the search to a specific user or organisation. + + +When you are providing zero or more than one external ids, all usage counts are aggregated and summed up. +Send multiple requests with one externalId each if you need counts per identity. + +#### keyId +(string, optional, may be provided multiple times) + +Only include data for a speciifc key or keys. + +When you are providing zero or more than one key ids, all usage counts are aggregated and summed up. +Send multiple requests with one keyId each if you need counts per key. + + + +### Example Access Patterns + +> A user sees a chart of their usage over the past 24h, showing the outcomes + +```bash +?start={timestamp_24h_ago}&end={timestamp_now}&externalId=user_123&granularity=hour + +[ + // 24 elements, one per hour + { time: 123, valid: 10, ratelimited: 2, ..., total: 30 }, +] +``` + +> A user sees a total usage counter and daily usage chart in the current month + +```bash +?start={timestamp_start_of_month}&end={timestamp_now}&granularity=day&externalId={user_123} + +[ + // up to 31 elements, one per day + { time: 123, valid: 10, ..., total: 30 } +] + +``` + +> A monthly cron job creates invoices for each identity: + +```bash +?start={timestamp_start_of_month}&end={timestamp_end_of_month}&granularity=month&externalId={user_123} + +[ + // one element for the single month + { time: 123, valid: 10, ..., total: 30 } +] +``` + + +> A user sees a gauge with their quota, showing they used X out of Y API calls in the current billing period: + + +```bash +?start={timestamp_start_of_billing_cycle}&end={timestamp_end_of_billing_cycle}&granularity=day&externalId={user_123} + +[ + { time: 123, valid: 10, ..., total: 30 } +] +``` +Sum up the `valid` or `total`, however you want to count, and display it to the user. + + + +### Response + +Successful responses will always return an array of datapoints. One datapoint per granular slice, ie: hourly granularity means you receive one element per hour within the queried interval. + +```json title="200 OK Body" +[ + Datapoint, + Datapoint, + Datapoint +] + +``` + +```ts title="Datapoint" +type Datapoint = { + /** + * Unix timestamp in milliseconds of the start of the current time slice. + */ + time: number + + + /** + * For brevity, I will not explain every outcome here. + * There will be one key and count for every possible outcome, so you may + * choose what to display or not. + */ + valid: number + rateLimited: number + usageExceeded: number + // ... + + /** + * Total number of verifications in the current time slice, regardless of outcome. + */ + total: number + + /** + * Only available if we add groupBy queries. + * In this case there would be one datapoint per time and groupBy target. + * See #unresolved-questions + */ + keyId?: string + apiId?: string + externalId?: string +} +``` + ## Drawbacks -I Why should we not do this? +Our current serverless architecture costs money per invocation. Our customer's users could generate a decent amount of requests. ## Alternatives -What other designs have been considered? What is the impact of not doing this? +Offering a prometheus `/metrics` endpoint would be interesting, however I believe most of our users don't have the infra in place to adopt this easily. + +___ + +Instead of aggregating multiple keyIds together, we could not allow specifying them multiple times and instead ask the user to create one request per id and then merge them together on their side. ## Unresolved questions -What parts of the design are still to be done? +- What cache times are acceptable? We probably don't want to hit ClickHouse for every single query, especially for fetching monthly aggregations. +- Should we add `groupBy`, `limit` and `orderBy` parameters for advanced usecases? + - Usage per key for all keys of an identity + - Top 10 keys of an identity + - Top 10 identities in an API From 005c0e675063208e10f2ea9b0a51314b2ac89d6f Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 19:33:04 +0100 Subject: [PATCH 3/8] feat: engineering docs styling --- apps/engineering/app/rfcs/[[...slug]]/page.tsx | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/apps/engineering/app/rfcs/[[...slug]]/page.tsx b/apps/engineering/app/rfcs/[[...slug]]/page.tsx index 6caded34cf..cafa68c9a0 100644 --- a/apps/engineering/app/rfcs/[[...slug]]/page.tsx +++ b/apps/engineering/app/rfcs/[[...slug]]/page.tsx @@ -37,16 +37,12 @@ export default async function Page({ {page.data.title} - -
- ID - {page.data.title.split(" ").at(0)} - {page.data.authors.length > 1 ? "Authors" : "Author"} - {page.data.authors.join(", ")} - Date - -
-
+
+ {page.data.authors.length > 1 ? "Authors" : "Author"} + {page.data.authors.join(", ")} + Date + +
{page.data.description} From 52f83be02399c8e6bf2240f2ca1ab10097dd0ec9 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:35:18 +0000 Subject: [PATCH 4/8] [autofix.ci] apply automated fixes --- apps/engineering/app/rfcs/[[...slug]]/page.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/engineering/app/rfcs/[[...slug]]/page.tsx b/apps/engineering/app/rfcs/[[...slug]]/page.tsx index cafa68c9a0..024390b5ce 100644 --- a/apps/engineering/app/rfcs/[[...slug]]/page.tsx +++ b/apps/engineering/app/rfcs/[[...slug]]/page.tsx @@ -1,5 +1,4 @@ import { rfcSource } from "@/app/source"; -import { Card } from "fumadocs-ui/components/card"; import defaultMdxComponents from "fumadocs-ui/mdx"; import { DocsBody, DocsDescription, DocsPage, DocsTitle } from "fumadocs-ui/page"; import type { Metadata } from "next"; From 3a721feb9e1bf31ee070a0e7f5c88e6b67c84951 Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 19:35:50 +0100 Subject: [PATCH 5/8] feat: engineering docs styling --- apps/engineering/app/rfcs/[[...slug]]/page.tsx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/apps/engineering/app/rfcs/[[...slug]]/page.tsx b/apps/engineering/app/rfcs/[[...slug]]/page.tsx index cafa68c9a0..0168ceb8e2 100644 --- a/apps/engineering/app/rfcs/[[...slug]]/page.tsx +++ b/apps/engineering/app/rfcs/[[...slug]]/page.tsx @@ -1,5 +1,4 @@ import { rfcSource } from "@/app/source"; -import { Card } from "fumadocs-ui/components/card"; import defaultMdxComponents from "fumadocs-ui/mdx"; import { DocsBody, DocsDescription, DocsPage, DocsTitle } from "fumadocs-ui/page"; import type { Metadata } from "next"; @@ -37,11 +36,16 @@ export default async function Page({ {page.data.title} -
- {page.data.authors.length > 1 ? "Authors" : "Author"} - {page.data.authors.join(", ")} - Date - +
+
+ {page.data.authors.length > 1 ? "Authors" : "Author"} + {page.data.authors.join(", ")} +
+ +
+ Date + +
{page.data.description} From b788bdf0719006b4ae545d318792c8b59f6f68af Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 22:17:43 +0100 Subject: [PATCH 6/8] chore: refine groupBy --- .../content/rfcs/0005-analytics-api.mdx | 63 ++++++++++++++----- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/apps/engineering/content/rfcs/0005-analytics-api.mdx b/apps/engineering/content/rfcs/0005-analytics-api.mdx index 151c01ba6e..3438c06027 100644 --- a/apps/engineering/content/rfcs/0005-analytics-api.mdx +++ b/apps/engineering/content/rfcs/0005-analytics-api.mdx @@ -117,11 +117,37 @@ Only include data for a speciifc key or keys. When you are providing zero or more than one key ids, all usage counts are aggregated and summed up. Send multiple requests with one keyId each if you need counts per key. +#### groupBy +(enum ["key", "identity"], optional) +By default, all datapoints are aggregated by time alone, summing up all verifications across identities and keys. +However in certain scenarios you want to get a breakdown per key. For example finding out the usage spread across all keys for a specific user. + + +### limit +(integer, optional) +Limit the number of returned datapoints. This may become useful for querying the top 10 identities based on usage. + +#### orderBy +(enum ["total", "valid", ..], optional) + + +This is a rough idea. + +We're leaning towards `?orderBy=valid&order=asc`, but have not decided what this API should look like. + + + + + +#### order +(enum ["asc", "desc"], optional, default="asc", only allowed in combination with `orderBy`) + +See above. ### Example Access Patterns -> A user sees a chart of their usage over the past 24h, showing the outcomes +> A chart of an enduser's usage over the past 24h, showing the outcomes ```bash ?start={timestamp_24h_ago}&end={timestamp_now}&externalId=user_123&granularity=hour @@ -132,14 +158,20 @@ Send multiple requests with one keyId each if you need counts per key. ] ``` -> A user sees a total usage counter and daily usage chart in the current month +> A daily usage breakdown for a user per key in the current month ```bash -?start={timestamp_start_of_month}&end={timestamp_now}&granularity=day&externalId={user_123} +?start={timestamp_start_of_month}&end={timestamp_now}&granularity=day&externalId={user_123}&groupBy=key [ - // up to 31 elements, one per day - { time: 123, valid: 10, ..., total: 30 } + // One row per keyId and time + { keyId: "key_1", time: 123, valid: 10, ..., total: 30 }, + { keyId: "key_1", time: 456, valid: 20, ..., total: 52 }, + ... + + { keyId: "key_2", time: 123, valid: 0, ..., total: 10 }, + { keyId: "key_2", time: 456, valid: 1, ..., total: 2 }, + ... ] ``` @@ -169,6 +201,11 @@ Send multiple requests with one keyId each if you need counts per key. Sum up the `valid` or `total`, however you want to count, and display it to the user. +> An internal dashboard shows the top 10 users by API usage over the past 30 days + +```bash +?start={timestamp_30_days_ago}&end={timestamp_now}&granularity=day&groupBy=identity&limit=10&orderBy=total&order=desc +``` ### Response @@ -180,7 +217,6 @@ Successful responses will always return an array of datapoints. One datapoint pe Datapoint, Datapoint ] - ``` ```ts title="Datapoint" @@ -207,13 +243,15 @@ type Datapoint = { total: number /** - * Only available if we add groupBy queries. + * Only available when specifying groupBy in the query. * In this case there would be one datapoint per time and groupBy target. - * See #unresolved-questions */ keyId?: string apiId?: string - externalId?: string + identity?: { + id: string + externalId: string + } } ``` @@ -228,12 +266,9 @@ Offering a prometheus `/metrics` endpoint would be interesting, however I believ ___ -Instead of aggregating multiple keyIds together, we could not allow specifying them multiple times and instead ask the user to create one request per id and then merge them together on their side. +Instead of aggregating multiple keyIds together, we could disallow specifying them multiple times and instead ask the user to create one request per id and then merge them together on their side. ## Unresolved questions - What cache times are acceptable? We probably don't want to hit ClickHouse for every single query, especially for fetching monthly aggregations. -- Should we add `groupBy`, `limit` and `orderBy` parameters for advanced usecases? - - Usage per key for all keys of an identity - - Top 10 keys of an identity - - Top 10 identities in an API +- When we return keyIds as part of groupBy queries, the user needs to make another call to our API in order to fetch details such as the name for each key. That doesn't feel great. From 73d9bb40f9218d755a9c2ef66efce34cc858c4ab Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 22:28:40 +0100 Subject: [PATCH 7/8] chore: refine groupBy --- apps/engineering/content/rfcs/0005-analytics-api.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/engineering/content/rfcs/0005-analytics-api.mdx b/apps/engineering/content/rfcs/0005-analytics-api.mdx index 3438c06027..57f71bed9c 100644 --- a/apps/engineering/content/rfcs/0005-analytics-api.mdx +++ b/apps/engineering/content/rfcs/0005-analytics-api.mdx @@ -272,3 +272,4 @@ Instead of aggregating multiple keyIds together, we could disallow specifying th - What cache times are acceptable? We probably don't want to hit ClickHouse for every single query, especially for fetching monthly aggregations. - When we return keyIds as part of groupBy queries, the user needs to make another call to our API in order to fetch details such as the name for each key. That doesn't feel great. +- What are the retention quotas tier and granularity? From ff1b232ac02d8d226f8ddbfe8db0b58550826a6b Mon Sep 17 00:00:00 2001 From: chronark Date: Wed, 27 Nov 2024 22:34:25 +0100 Subject: [PATCH 8/8] fix: typo --- apps/engineering/content/rfcs/0005-analytics-api.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/engineering/content/rfcs/0005-analytics-api.mdx b/apps/engineering/content/rfcs/0005-analytics-api.mdx index 57f71bed9c..5b0e3094e5 100644 --- a/apps/engineering/content/rfcs/0005-analytics-api.mdx +++ b/apps/engineering/content/rfcs/0005-analytics-api.mdx @@ -67,7 +67,7 @@ The root key will require specific permissions tbd. Calling the endpoint will return an array of verification counts, aggregated by time and provided filters. -All required and optional arguments are passed via query parameters. Some paramters may be specific multiple times, either as You may specify multiple ids such as `?param=value_1,value_2` or `?param=value_1¶m=value_2` +All required and optional arguments are passed via query parameters. Some parameters may be specified multiple times, either as You may specify multiple ids such as `?param=value_1,value_2` or `?param=value_1¶m=value_2` #### start (integer, required)