From feb2d4196b8772b22ca817b7dd1eac5f9922995b Mon Sep 17 00:00:00 2001 From: Robin Moffatt Date: Thu, 27 Jul 2023 18:49:37 +0100 Subject: [PATCH] Add content to index pages of /understand (#6270) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remove default layout metadata - It's the only layout - It's the default anyway - It adds unnecessary content to the front page matter metadata for pages, making it more likely to make a mistake editing what is there * Add content to index pages of /understand, fix a few page nits * Always that file you forget to update 🤦🏻 --- cmd/lakectl/cmd/docs.go | 1 - docs/404.md | 1 - docs/cloud/auditing.md | 1 - docs/cloud/index.md | 1 - docs/cloud/managed-gc.md | 1 - docs/cloud/private-link.md | 1 - docs/cloud/sso.md | 1 - docs/cloud/unity-delta-sharing.md | 1 - docs/enterprise/index.md | 1 - docs/enterprise/sso.md | 1 - docs/howto/copying.md | 1 - docs/howto/deploy/aws.md | 1 - docs/howto/deploy/azure.md | 1 - docs/howto/deploy/gcp.md | 1 - docs/howto/deploy/index.md | 7 ++- docs/howto/deploy/onprem.md | 1 - docs/howto/deploy/upgrade.md | 1 - docs/howto/export.md | 1 - docs/howto/garbage-collection-committed.md | 1 - docs/howto/garbage-collection-index.md | 1 - docs/howto/garbage-collection-uncommitted.md | 1 - docs/howto/garbage-collection.md | 1 - docs/howto/gc-internals.md | 1 - docs/howto/hooks/airflow.md | 1 - docs/howto/hooks/index.md | 1 - docs/howto/hooks/lua.md | 1 - docs/howto/hooks/webhooks.md | 1 - docs/howto/import.md | 1 - docs/howto/index.md | 1 - docs/howto/migrate-away.md | 1 - docs/howto/protect-branches.md | 1 - docs/howto/sizing-guide.md | 1 - docs/index.md | 1 - docs/integrations/airbyte.md | 1 - docs/integrations/airflow.md | 1 - docs/integrations/athena.md | 1 - docs/integrations/aws_cli.md | 1 - docs/integrations/cloudera.md | 1 - docs/integrations/dbt.md | 1 - docs/integrations/delta.md | 1 - docs/integrations/dremio.md | 1 - docs/integrations/duckdb.md | 1 - docs/integrations/glue_hive_metastore.md | 1 - docs/integrations/hive.md | 1 - docs/integrations/iceberg.md | 1 - docs/integrations/index.md | 1 - docs/integrations/kafka.md | 1 - docs/integrations/kubeflow.md | 1 - docs/integrations/presto_trino.md | 1 - docs/integrations/python.md | 1 - docs/integrations/r.md | 1 - docs/integrations/sagemaker.md | 1 - docs/integrations/spark.md | 1 - docs/posts/index.md | 1 - docs/posts/security_update.md | 1 - docs/project/contributing.md | 1 - docs/project/docs/callouts.md | 1 - docs/project/index.md | 1 - docs/quickstart/learning-more-lakefs.md | 1 - docs/reference/access-control-lists.md | 1 - docs/reference/api.md | 1 - docs/reference/authentication.md | 1 - docs/reference/cli.md | 1 - docs/reference/configuration.md | 1 - docs/reference/index.md | 1 - docs/reference/monitor.md | 1 - docs/reference/presigned-url.md | 1 - docs/reference/rbac.md | 1 - docs/reference/remote-authenticator.md | 1 - docs/reference/s3.md | 1 - docs/reference/spark-client.md | 1 - docs/slack/index.md | 1 - docs/understand/architecture.md | 38 ++++++---------- .../data_lifecycle_management/ci.md | 2 - .../data_lifecycle_management/data-devenv.md | 2 - .../data_lifecycle_management/index.md | 11 ++++- .../data_lifecycle_management/production.md | 2 - docs/understand/faq.md | 1 - docs/understand/glossary.md | 2 - docs/understand/how/index.md | 14 ++++-- docs/understand/how/kv.md | 2 - docs/understand/how/merge.md | 4 +- docs/understand/how/versioning-internals.md | 2 - docs/understand/index.md | 43 ++++++++++++++++++- docs/understand/model.md | 12 +++--- docs/understand/performance-best-practices.md | 2 - docs/understand/use_cases/etl_testing.md | 1 - docs/understand/use_cases/index.md | 18 +++++++- 88 files changed, 100 insertions(+), 134 deletions(-) diff --git a/cmd/lakectl/cmd/docs.go b/cmd/lakectl/cmd/docs.go index 8b10c50185d..85e7a808ad8 100644 --- a/cmd/lakectl/cmd/docs.go +++ b/cmd/lakectl/cmd/docs.go @@ -11,7 +11,6 @@ import ( // language=markdown var cliReferenceHeader = `--- -layout: default title: lakectl (lakeFS command-line tool) description: lakeFS comes with its own native CLI client. Here you can see the complete command reference. parent: Reference diff --git a/docs/404.md b/docs/404.md index a649e61747e..7f5d0d9dba7 100644 --- a/docs/404.md +++ b/docs/404.md @@ -1,5 +1,4 @@ --- -layout: default permalink: /404.html nav_exclude: true --- diff --git a/docs/cloud/auditing.md b/docs/cloud/auditing.md index 019444cb3da..e7922e3924f 100644 --- a/docs/cloud/auditing.md +++ b/docs/cloud/auditing.md @@ -1,5 +1,4 @@ --- -layout: default title: Auditing parent: lakeFS Cloud description: Auditing is a solution for lakeFS Cloud which enables tracking of events and activities performed within the solution. These logs capture information such as who accessed the solution, what actions were taken, and when they occurred. diff --git a/docs/cloud/index.md b/docs/cloud/index.md index 7e7eb59823a..c34719f8f7f 100644 --- a/docs/cloud/index.md +++ b/docs/cloud/index.md @@ -1,5 +1,4 @@ --- -layout: default title: lakeFS Cloud description: This section includes lakeFS Cloud documentation nav_order: 80 diff --git a/docs/cloud/managed-gc.md b/docs/cloud/managed-gc.md index 1c6e80fa3e5..b98e0f2f64b 100644 --- a/docs/cloud/managed-gc.md +++ b/docs/cloud/managed-gc.md @@ -1,5 +1,4 @@ --- -layout: default title: Managed Garbage Collection description: Reduce the operational overhead of running garbage collection manually. parent: lakeFS Cloud diff --git a/docs/cloud/private-link.md b/docs/cloud/private-link.md index cd5e1ad9364..e8416a873bb 100644 --- a/docs/cloud/private-link.md +++ b/docs/cloud/private-link.md @@ -1,5 +1,4 @@ --- -layout: default title: Private Link description: Private Link enables lakeFS Cloud to interact with your infrastructure using private networking. parent: lakeFS Cloud diff --git a/docs/cloud/sso.md b/docs/cloud/sso.md index a4087b2b786..c6279298c11 100644 --- a/docs/cloud/sso.md +++ b/docs/cloud/sso.md @@ -1,5 +1,4 @@ --- -layout: default title: Single Sign On (SSO) description: How to configure Single Sign On (SSO) for lakeFS Cloud. parent: lakeFS Cloud diff --git a/docs/cloud/unity-delta-sharing.md b/docs/cloud/unity-delta-sharing.md index 8e695623d81..ec809af5408 100644 --- a/docs/cloud/unity-delta-sharing.md +++ b/docs/cloud/unity-delta-sharing.md @@ -1,5 +1,4 @@ --- -layout: default title: Unity Delta Sharing parent: lakeFS Cloud description: The lakeFS Delta Sharing service lets you export DeltaLake and HMS-style tables stored on lakeFS over the Delta Sharing protocol. This is particularly useful with DataBricks Unity. diff --git a/docs/enterprise/index.md b/docs/enterprise/index.md index 1dafad16ab7..462fd4283d2 100644 --- a/docs/enterprise/index.md +++ b/docs/enterprise/index.md @@ -1,5 +1,4 @@ --- -layout: default title: lakeFS Enterprise description: lakeFS Enterprise is an enterprise-ready lakeFS solution providing additional features including RBAC, SSO and Support SLA. nav_order: 81 diff --git a/docs/enterprise/sso.md b/docs/enterprise/sso.md index 4fc644f793d..f288d9af25c 100644 --- a/docs/enterprise/sso.md +++ b/docs/enterprise/sso.md @@ -1,5 +1,4 @@ --- -layout: default title: Single Sign On (SSO) in lakeFS Enterprise description: How to configure Single Sign On in lakeFS Enterprise. parent: lakeFS Enterprise diff --git a/docs/howto/copying.md b/docs/howto/copying.md index 8a0935a2ac0..f7573c64c57 100644 --- a/docs/howto/copying.md +++ b/docs/howto/copying.md @@ -1,5 +1,4 @@ --- -layout: default title: Copying data to/from lakeFS description: parent: How-To diff --git a/docs/howto/deploy/aws.md b/docs/howto/deploy/aws.md index c23b2047432..32f12d6dcfa 100644 --- a/docs/howto/deploy/aws.md +++ b/docs/howto/deploy/aws.md @@ -1,5 +1,4 @@ --- -layout: default title: AWS grand_parent: How-To parent: Install lakeFS diff --git a/docs/howto/deploy/azure.md b/docs/howto/deploy/azure.md index e100700a496..a0df061d80f 100644 --- a/docs/howto/deploy/azure.md +++ b/docs/howto/deploy/azure.md @@ -1,5 +1,4 @@ --- -layout: default title: Azure grand_parent: How-To parent: Install lakeFS diff --git a/docs/howto/deploy/gcp.md b/docs/howto/deploy/gcp.md index 701c8eebca1..6c9508375d1 100644 --- a/docs/howto/deploy/gcp.md +++ b/docs/howto/deploy/gcp.md @@ -1,5 +1,4 @@ --- -layout: default title: GCP grand_parent: How-To parent: Install lakeFS diff --git a/docs/howto/deploy/index.md b/docs/howto/deploy/index.md index b32c106565c..875f59fac6f 100644 --- a/docs/howto/deploy/index.md +++ b/docs/howto/deploy/index.md @@ -1,5 +1,4 @@ --- -layout: default title: Install lakeFS parent: How-To description: This section will guide you through deploying and setting up a production lakeFS environment. @@ -16,4 +15,8 @@ redirect_from: # Deploy and Setup lakeFS For a hosted lakeFS service with guaranteed SLAs, try [lakeFS Cloud](https://lakefs.cloud) -{: .note } \ No newline at end of file +{: .note } + +lakeFS releases include [binaries](https://github.com/treeverse/lakeFS/releases) for common operating systems, a [containerized option](https://hub.docker.com/r/treeverse/lakefs) or a [Helm chart](https://artifacthub.io/packages/helm/lakefs/lakefs). + +Check out our guides for running lakeFS on [AWS]({{ site.baseurl }}/howto/deploy/aws.md), [GCP]({{ site.baseurl }}/howto/deploy/gcp.md) and [more]({{ site.baseurl }}/howto/deploy). diff --git a/docs/howto/deploy/onprem.md b/docs/howto/deploy/onprem.md index a36dcbdc306..a9f0564c883 100644 --- a/docs/howto/deploy/onprem.md +++ b/docs/howto/deploy/onprem.md @@ -1,5 +1,4 @@ --- -layout: default title: On-Premises Deployment of lakeFS grand_parent: How-To parent: Install lakeFS diff --git a/docs/howto/deploy/upgrade.md b/docs/howto/deploy/upgrade.md index dad5b472c61..a885e0848c4 100644 --- a/docs/howto/deploy/upgrade.md +++ b/docs/howto/deploy/upgrade.md @@ -1,5 +1,4 @@ --- -layout: default title: Upgrade lakeFS description: A guide to upgrading lakeFS to the latest version. grand_parent: How-To diff --git a/docs/howto/export.md b/docs/howto/export.md index 21d238aaed4..8e38eb7ac3d 100644 --- a/docs/howto/export.md +++ b/docs/howto/export.md @@ -1,5 +1,4 @@ --- -layout: default title: Export Data description: Use the lakeFS Spark client or RClone inside Docker to export a lakeFS commit to the object store. parent: How-To diff --git a/docs/howto/garbage-collection-committed.md b/docs/howto/garbage-collection-committed.md index f0732d2edbf..ccb6fb1270d 100644 --- a/docs/howto/garbage-collection-committed.md +++ b/docs/howto/garbage-collection-committed.md @@ -1,5 +1,4 @@ --- -layout: default title: (deprecated) Committed Objects description: Clean up unnecessary objects using the garbage collection feature in lakeFS. parent: Garbage Collection diff --git a/docs/howto/garbage-collection-index.md b/docs/howto/garbage-collection-index.md index e1e03eff417..3eb824eaeb8 100644 --- a/docs/howto/garbage-collection-index.md +++ b/docs/howto/garbage-collection-index.md @@ -1,5 +1,4 @@ --- -layout: default title: Garbage Collection description: Clean up unnecessary objects using the garbage collection feature in lakeFS. parent: How-To diff --git a/docs/howto/garbage-collection-uncommitted.md b/docs/howto/garbage-collection-uncommitted.md index 936e5c625e4..9957d86f6be 100644 --- a/docs/howto/garbage-collection-uncommitted.md +++ b/docs/howto/garbage-collection-uncommitted.md @@ -1,5 +1,4 @@ --- -layout: default title: (deprecated) Uncommitted Objects description: Clean up uncommitted objects that are no longer needed. parent: Garbage Collection diff --git a/docs/howto/garbage-collection.md b/docs/howto/garbage-collection.md index cfa0798342c..5ab2e07abce 100644 --- a/docs/howto/garbage-collection.md +++ b/docs/howto/garbage-collection.md @@ -1,5 +1,4 @@ --- -layout: default title: Garbage Collection description: Clean up expired objects using the garbage collection feature in lakeFS. parent: Garbage Collection diff --git a/docs/howto/gc-internals.md b/docs/howto/gc-internals.md index a12679f0c24..3b5d26dd484 100644 --- a/docs/howto/gc-internals.md +++ b/docs/howto/gc-internals.md @@ -1,5 +1,4 @@ --- -layout: default title: "Internals: Committed GC" description: How Garbage Collection in lakeFS works parent: Garbage Collection diff --git a/docs/howto/hooks/airflow.md b/docs/howto/hooks/airflow.md index 7814dfa3669..a01eb8b4dd9 100644 --- a/docs/howto/hooks/airflow.md +++ b/docs/howto/hooks/airflow.md @@ -1,5 +1,4 @@ --- -layout: default title: Airflow Hooks parent: Actions and Hooks grand_parent: How-To diff --git a/docs/howto/hooks/index.md b/docs/howto/hooks/index.md index ce9b27276f7..8f3c1731300 100644 --- a/docs/howto/hooks/index.md +++ b/docs/howto/hooks/index.md @@ -1,5 +1,4 @@ --- -layout: default title: Actions and Hooks description: Overview of lakeFS Actions and Hooks has_children: true diff --git a/docs/howto/hooks/lua.md b/docs/howto/hooks/lua.md index d1318ac11ea..7e8d94ecc23 100644 --- a/docs/howto/hooks/lua.md +++ b/docs/howto/hooks/lua.md @@ -1,5 +1,4 @@ --- -layout: default title: Lua Hooks parent: Actions and Hooks grand_parent: How-To diff --git a/docs/howto/hooks/webhooks.md b/docs/howto/hooks/webhooks.md index 9a029fed205..60536564bc2 100644 --- a/docs/howto/hooks/webhooks.md +++ b/docs/howto/hooks/webhooks.md @@ -1,5 +1,4 @@ --- -layout: default title: Webhooks parent: Actions and Hooks grand_parent: How-To diff --git a/docs/howto/import.md b/docs/howto/import.md index 738e29cf551..cfadf0319f5 100644 --- a/docs/howto/import.md +++ b/docs/howto/import.md @@ -1,5 +1,4 @@ --- -layout: default title: Import data description: Import existing data into a lakeFS repository parent: How-To diff --git a/docs/howto/index.md b/docs/howto/index.md index 899d46c89eb..d44ba92dffd 100644 --- a/docs/howto/index.md +++ b/docs/howto/index.md @@ -1,5 +1,4 @@ --- -layout: default title: How-To description: How to perform various tasks in lakeFS nav_order: 5 diff --git a/docs/howto/migrate-away.md b/docs/howto/migrate-away.md index a3af0e45e3b..e9e2904c565 100644 --- a/docs/howto/migrate-away.md +++ b/docs/howto/migrate-away.md @@ -1,5 +1,4 @@ --- -layout: default title: Migrating away from lakeFS description: The simplest way to migrate away from lakeFS is by copying data from a lakeFS repository to an S3 bucket. parent: How-To diff --git a/docs/howto/protect-branches.md b/docs/howto/protect-branches.md index 531a4dced92..606674e9d09 100644 --- a/docs/howto/protect-branches.md +++ b/docs/howto/protect-branches.md @@ -1,5 +1,4 @@ --- -layout: default title: Protect Branches description: Branch protection rules prevent direct changes from being applied to your important branches. parent: How-To diff --git a/docs/howto/sizing-guide.md b/docs/howto/sizing-guide.md index 7c2d04b7c48..cce0d9866b5 100644 --- a/docs/howto/sizing-guide.md +++ b/docs/howto/sizing-guide.md @@ -1,5 +1,4 @@ --- -layout: default title: Sizing Guide parent: How-To description: This section provides a detailed sizing guide for deploying lakeFS. diff --git a/docs/index.md b/docs/index.md index 0ad121bc537..1bdb172d422 100755 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,4 @@ --- -layout: default title: Welcome to lakeFS 👋🏻 description: The lakeFS documentation provides guidance on how to use lakeFS to deliver resilience and manageability to data lakes. nav_order: 0 diff --git a/docs/integrations/airbyte.md b/docs/integrations/airbyte.md index 49fe6bd3200..caeeba1ae25 100644 --- a/docs/integrations/airbyte.md +++ b/docs/integrations/airbyte.md @@ -1,5 +1,4 @@ --- -layout: default title: Airbyte description: Use Airbyte with lakeFS to easily sync data between applications and S3 with lakeFS version control. parent: Integrations diff --git a/docs/integrations/airflow.md b/docs/integrations/airflow.md index e892b5e9513..378b7220005 100644 --- a/docs/integrations/airflow.md +++ b/docs/integrations/airflow.md @@ -1,5 +1,4 @@ --- -layout: default title: Airflow description: Easily build reproducible data pipelines with Airflow and lakeFS using commits, without modifying the code or logic of your job. parent: Integrations diff --git a/docs/integrations/athena.md b/docs/integrations/athena.md index 1178cdd4e0f..10a89cdef9d 100644 --- a/docs/integrations/athena.md +++ b/docs/integrations/athena.md @@ -1,5 +1,4 @@ --- -layout: default title: Amazon Athena description: This section shows how you can start querying data from lakeFS using Amazon Athena. parent: Integrations diff --git a/docs/integrations/aws_cli.md b/docs/integrations/aws_cli.md index d80a24bbcc0..ce0eb850237 100644 --- a/docs/integrations/aws_cli.md +++ b/docs/integrations/aws_cli.md @@ -1,5 +1,4 @@ --- -layout: default title: AWS CLI description: This section shows how to use the AWS CLI for AWS S3 to access lakeFS. parent: Integrations diff --git a/docs/integrations/cloudera.md b/docs/integrations/cloudera.md index e851eae4cf3..2389579ff79 100644 --- a/docs/integrations/cloudera.md +++ b/docs/integrations/cloudera.md @@ -1,5 +1,4 @@ --- -layout: default title: Cloudera description: Accessing data in lakeFS from Cloudera Spark works the same as accessing S3 data from Apache Spark. parent: Integrations diff --git a/docs/integrations/dbt.md b/docs/integrations/dbt.md index 384ea489ce0..857f02acfbc 100644 --- a/docs/integrations/dbt.md +++ b/docs/integrations/dbt.md @@ -1,5 +1,4 @@ --- -layout: default title: dbt description: This guide covers maintaining environments with dbt and lakeFS. parent: Integrations diff --git a/docs/integrations/delta.md b/docs/integrations/delta.md index 7a3391d2853..0199f02906b 100644 --- a/docs/integrations/delta.md +++ b/docs/integrations/delta.md @@ -1,5 +1,4 @@ --- -layout: default title: Delta Lake description: This section explains how to use Delta Lake with lakeFS. parent: Integrations diff --git a/docs/integrations/dremio.md b/docs/integrations/dremio.md index da2d8f8611d..966e6178fd9 100644 --- a/docs/integrations/dremio.md +++ b/docs/integrations/dremio.md @@ -1,5 +1,4 @@ --- -layout: default title: Dremio description: This section shows how you can start using lakeFS with Dremio, a next-generation data lake engine. parent: Integrations diff --git a/docs/integrations/duckdb.md b/docs/integrations/duckdb.md index d8a15ae4378..411db06cdc4 100644 --- a/docs/integrations/duckdb.md +++ b/docs/integrations/duckdb.md @@ -1,5 +1,4 @@ --- -layout: default title: DuckDB description: How to use lakeFS with DuckDB, an open-source SQL OLAP database management system. parent: Integrations diff --git a/docs/integrations/glue_hive_metastore.md b/docs/integrations/glue_hive_metastore.md index b3d7d20fa24..be757a29b2d 100644 --- a/docs/integrations/glue_hive_metastore.md +++ b/docs/integrations/glue_hive_metastore.md @@ -1,5 +1,4 @@ --- -layout: default title: Glue / Hive metastore description: This section explains how to query data from lakeFS branches in services backed by Glue/Hive Metastore. parent: Integrations diff --git a/docs/integrations/hive.md b/docs/integrations/hive.md index ec821cce1e9..921e1bff4fd 100644 --- a/docs/integrations/hive.md +++ b/docs/integrations/hive.md @@ -1,5 +1,4 @@ --- -layout: default title: Hive description: This section covers how you can start using lakeFS with Apache Hive, a distributed data warehouse system that enables analytics at a massive scale. parent: Integrations diff --git a/docs/integrations/iceberg.md b/docs/integrations/iceberg.md index 80c6beb1f78..785a3285d9c 100644 --- a/docs/integrations/iceberg.md +++ b/docs/integrations/iceberg.md @@ -1,5 +1,4 @@ --- -layout: default title: Apache Iceberg description: How to integrate lakeFS with Apache Iceberg parent: Integrations diff --git a/docs/integrations/index.md b/docs/integrations/index.md index 2a4d1873d60..76cfa686785 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -1,5 +1,4 @@ --- -layout: default title: Integrations description: Integrate lakeFS with all modern data frameworks such as Spark, Apache Iceberg, Hive, AWS Athena, Presto, and more. nav_order: 10 diff --git a/docs/integrations/kafka.md b/docs/integrations/kafka.md index a82a7aca8bb..83aae8bb6bd 100644 --- a/docs/integrations/kafka.md +++ b/docs/integrations/kafka.md @@ -1,5 +1,4 @@ --- -layout: default title: Kafka description: This section explains how you can start using lakeFS with Kafka using Confluent’s S3 Sink Connector. parent: Integrations diff --git a/docs/integrations/kubeflow.md b/docs/integrations/kubeflow.md index 73f393f4853..dab81789a79 100644 --- a/docs/integrations/kubeflow.md +++ b/docs/integrations/kubeflow.md @@ -1,5 +1,4 @@ --- -layout: default title: Kubeflow description: Easily build reproducible data pipelines with Kubeflow and lakeFS using commits, without modifying the code or logic of your job. parent: Integrations diff --git a/docs/integrations/presto_trino.md b/docs/integrations/presto_trino.md index 77bf338c7d3..f495f70c47f 100644 --- a/docs/integrations/presto_trino.md +++ b/docs/integrations/presto_trino.md @@ -1,5 +1,4 @@ --- -layout: default title: Presto/Trino description: This section explains how you can start using lakeFS with Presto/Trino, an open-source distributed SQL query engine. parent: Integrations diff --git a/docs/integrations/python.md b/docs/integrations/python.md index 50534002063..4f4b8ea4dd4 100644 --- a/docs/integrations/python.md +++ b/docs/integrations/python.md @@ -1,5 +1,4 @@ --- -layout: default title: Python description: Use Python to interact with your objects on lakeFS parent: Integrations diff --git a/docs/integrations/r.md b/docs/integrations/r.md index fa845672fa8..e6c5e7a19ba 100644 --- a/docs/integrations/r.md +++ b/docs/integrations/r.md @@ -1,5 +1,4 @@ --- -layout: default title: R description: How to use lakeFS from R including creating branches, committing changes, and merging. parent: Integrations diff --git a/docs/integrations/sagemaker.md b/docs/integrations/sagemaker.md index f675cf31bbe..a6ed9d79525 100644 --- a/docs/integrations/sagemaker.md +++ b/docs/integrations/sagemaker.md @@ -1,5 +1,4 @@ --- -layout: default title: SageMaker description: This section explains how to integrate your SageMaker installation to work with lakeFS. parent: Integrations diff --git a/docs/integrations/spark.md b/docs/integrations/spark.md index 907a776b82d..0e923830423 100644 --- a/docs/integrations/spark.md +++ b/docs/integrations/spark.md @@ -1,5 +1,4 @@ --- -layout: default title: Spark description: Accessing data in lakeFS from Apache Spark works the same as accessing S3 data from Apache Spark. parent: Integrations diff --git a/docs/posts/index.md b/docs/posts/index.md index 73f2d530da4..7b68df3dc22 100644 --- a/docs/posts/index.md +++ b/docs/posts/index.md @@ -1,5 +1,4 @@ --- -layout: default has_children: true search_exclude: true --- diff --git a/docs/posts/security_update.md b/docs/posts/security_update.md index 79b98c0244b..fc252559429 100644 --- a/docs/posts/security_update.md +++ b/docs/posts/security_update.md @@ -1,5 +1,4 @@ --- -layout: default parent: posts has_children: false date: 2023-01-31 diff --git a/docs/project/contributing.md b/docs/project/contributing.md index ac42961ab5a..66918838064 100644 --- a/docs/project/contributing.md +++ b/docs/project/contributing.md @@ -1,5 +1,4 @@ --- -layout: default title: Contributing to lakeFS description: lakeFS community welcomes your contribution. To make the process as seamless as possible, we recommend reading this contribution guide first. parent: The lakeFS Project diff --git a/docs/project/docs/callouts.md b/docs/project/docs/callouts.md index e8c0fa87e38..44fc931ccdd 100644 --- a/docs/project/docs/callouts.md +++ b/docs/project/docs/callouts.md @@ -1,5 +1,4 @@ --- -layout: default title: Callouts description: Using and Customising Callouts in lakeFS Documentation parent: Documentation diff --git a/docs/project/index.md b/docs/project/index.md index e4e93d0459c..d50fabfa106 100644 --- a/docs/project/index.md +++ b/docs/project/index.md @@ -1,5 +1,4 @@ --- -layout: default title: The lakeFS Project description: lakeFS is an open-source project under the Apache 2.0 license, committed to fostering the open-source space. nav_order: 100 diff --git a/docs/quickstart/learning-more-lakefs.md b/docs/quickstart/learning-more-lakefs.md index 6fe0be23ee4..59bd8a4d310 100644 --- a/docs/quickstart/learning-more-lakefs.md +++ b/docs/quickstart/learning-more-lakefs.md @@ -1,5 +1,4 @@ --- -layout: default title: 🧑🏻‍🎓 Learn more about lakeFS description: Learn more about lakeFS here with links to resources including quickstart, samples, installation guides, and more. parent: ⭐ Quickstart ⭐ diff --git a/docs/reference/access-control-lists.md b/docs/reference/access-control-lists.md index 5304e0792e6..00462a1c8a1 100644 --- a/docs/reference/access-control-lists.md +++ b/docs/reference/access-control-lists.md @@ -1,5 +1,4 @@ --- -layout: default title: Access Control Lists (ACLs) parent: Reference description: Access control lists (ACLs) are one of the resource-based options that you can use to manage access to your repositories and objects. There are limits to managing permissions using ACLs. diff --git a/docs/reference/api.md b/docs/reference/api.md index 89dcba983e6..9bf130d2966 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -1,5 +1,4 @@ --- -layout: default title: API Reference description: This section includes the reference documentation for the lakeFS platform's various APIs. parent: Reference diff --git a/docs/reference/authentication.md b/docs/reference/authentication.md index 1cc32fb9294..42a7ad872f9 100644 --- a/docs/reference/authentication.md +++ b/docs/reference/authentication.md @@ -1,5 +1,4 @@ --- -layout: default title: Authentication description: This section covers Authentication of your lakeFS server. parent: Reference diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 6bd43f2bc88..1700d28279d 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -1,5 +1,4 @@ --- -layout: default title: lakectl (lakeFS command-line tool) description: lakeFS comes with its own native CLI client. Here you can see the complete command reference. parent: Reference diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index b6c365e4eb9..bba012bda69 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1,5 +1,4 @@ --- -layout: default title: Configuration description: Configuring lakeFS is done using a YAML configuration file. This reference uses `.` to denote the nesting of values. parent: Reference diff --git a/docs/reference/index.md b/docs/reference/index.md index 8a8748922d1..6cf712126a0 100644 --- a/docs/reference/index.md +++ b/docs/reference/index.md @@ -1,5 +1,4 @@ --- -layout: default title: Reference description: Reference documentation for the lakeFS platform's various APIs, CLIs, and file formats. nav_order: 20 diff --git a/docs/reference/monitor.md b/docs/reference/monitor.md index 0f85a41d9f2..c550f54a45a 100644 --- a/docs/reference/monitor.md +++ b/docs/reference/monitor.md @@ -1,5 +1,4 @@ --- -layout: default title: Monitoring using Prometheus description: A guide to monitoring your lakeFS Installation with Prometheus. parent: Reference diff --git a/docs/reference/presigned-url.md b/docs/reference/presigned-url.md index eb0a6d9f069..db0a996e0d3 100644 --- a/docs/reference/presigned-url.md +++ b/docs/reference/presigned-url.md @@ -1,5 +1,4 @@ --- -layout: default title: Presigned URL description: Configuring lakeFS to use presigned URLs parent: Reference diff --git a/docs/reference/rbac.md b/docs/reference/rbac.md index bd8deba6666..1b2694836d8 100644 --- a/docs/reference/rbac.md +++ b/docs/reference/rbac.md @@ -1,5 +1,4 @@ --- -layout: default title: Role-Based Access Control (RBAC) description: This section covers authorization (using RBAC) of your lakeFS server. parent: Reference diff --git a/docs/reference/remote-authenticator.md b/docs/reference/remote-authenticator.md index 118fc00a3c1..b524489a7bf 100644 --- a/docs/reference/remote-authenticator.md +++ b/docs/reference/remote-authenticator.md @@ -1,5 +1,4 @@ --- -layout: default title: Remote Authenticator description: Create a pluggable remote authenticator to integrate lakeFS with your existing security infrastructure. parent: Reference diff --git a/docs/reference/s3.md b/docs/reference/s3.md index e7612e52c97..ca13106ad38 100644 --- a/docs/reference/s3.md +++ b/docs/reference/s3.md @@ -1,5 +1,4 @@ --- -layout: default title: S3 Supported API description: "S3-supported API. lakeFS supports the following API operations: Identity and authorization, Bucket operations, Object operations and listing" parent: Reference diff --git a/docs/reference/spark-client.md b/docs/reference/spark-client.md index faac47abaaa..fcbf879be87 100644 --- a/docs/reference/spark-client.md +++ b/docs/reference/spark-client.md @@ -1,5 +1,4 @@ --- -layout: default title: Spark Client description: The lakeFS Spark client performs operations on lakeFS committed metadata stored in the object store. parent: Reference diff --git a/docs/slack/index.md b/docs/slack/index.md index a3ca79686f1..a1c83e9ed19 100644 --- a/docs/slack/index.md +++ b/docs/slack/index.md @@ -1,5 +1,4 @@ --- -layout: default title: Slack nav_order: 100 description: Redirect page for lakeFS slack joining url diff --git a/docs/understand/architecture.md b/docs/understand/architecture.md index 0a1fa06a21d..a467e0b8efc 100644 --- a/docs/understand/architecture.md +++ b/docs/understand/architecture.md @@ -1,50 +1,36 @@ --- -layout: default title: Architecture parent: Understanding lakeFS description: lakeFS architecture overview. Learn more about lakeFS components, including its S3 API gateway. -nav_order: 10 has_children: false redirect_from: - /architecture/index.html - /architecture/overview.html --- -# Architecture Overview +# lakeFS Architecture +lakeFS is distributed as a single binary encapsulating several logical services. -{% include toc_2-3.html %} - -## Overview +The server itself is stateless, meaning you can easily add more instances to handle a bigger load. -lakeFS is distributed as a single binary encapsulating several logical services: +![Architecture]({{ site.baseurl }}/assets/img/architecture.png) -The server itself is stateless, meaning you can easily add more instances to handle a bigger load. +{% include toc_2-3.html %} +### Object Storage -The following underlying object stores (or any S3-compatible store) can be used by lakeFS to store data: +lakeFS stores data in object stores. Those supported include: +- AWS S3 - Google Cloud Storage - Azure Blob Storage -- AWS S3 - MinIO - Ceph -In additional a Key Value storage is used for storing metadata: - -- [PostgreSQL](https://www.postgresql.org/){:target="_blank"} -- [DynamoDB](https://aws.amazon.com/dynamodb/){:target="_blank"} - -Instructions of how to deploy such database on AWS can be found [here]({{ site.baseurl }}/howto/deploy/aws.md#grant-dynamodb-permissions-to-lakefs). - -Additional information on the data format can be found in [Versioning internals]({{ site.baseurl }}/understand/how/versioning-internals.md). - - -![Architecture]({{ site.baseurl }}/assets/img/architecture.png) +### Metadata Storage -## Ways to deploy lakeFS +In additional a Key Value storage is used for storing metadata, with supported databases including PostgreSQL, DynamoDB, and CosmosDB Instructions of how to deploy such database on AWS can be found [here]({{ site.baseurl }}/howto/deploy/aws.md#grant-dynamodb-permissions-to-lakefs). -lakeFS releases include [binaries](https://github.com/treeverse/lakeFS/releases) for common operating systems, a [containerized option](https://hub.docker.com/r/treeverse/lakefs) or -a [Helm chart](https://artifacthub.io/packages/helm/lakefs/lakefs). -Check out our guides for running lakeFS on [AWS]({{ site.baseurl }}/howto/deploy/aws.md), [GCP]({{ site.baseurl }}/howto/deploy/gcp.md) and [more]({{ site.baseurl }}/howto/deploy). +Additional information on the data format can be found in [Versioning internals](./how/versioning-internals.md) and [Internal database structure](./how/kv.md) ### Load Balancing @@ -75,7 +61,7 @@ See the [roadmap]({{ site.baseurl }}/project/index.md#roadmap) for information o ### Graveler The Graveler handles lakeFS versioning by translating lakeFS addresses to the actual stored objects. -To learn about the data model used to store lakeFS metadata, see the [data model section]({{ site.baseurl }}/understand/how/versioning-internals.md). +To learn about the data model used to store lakeFS metadata, see the [versioning internals page]({{ site.baseurl }}/understand/how/versioning-internals.md). ### Authentication & Authorization Service diff --git a/docs/understand/data_lifecycle_management/ci.md b/docs/understand/data_lifecycle_management/ci.md index 608d6f0e9ed..4c5fdd6f756 100644 --- a/docs/understand/data_lifecycle_management/ci.md +++ b/docs/understand/data_lifecycle_management/ci.md @@ -1,10 +1,8 @@ --- -layout: default title: During Deployment parent: Data Lifecycle Management grand_parent: Understanding lakeFS description: lakeFS enables to continuously test newly ingested data to ensure data quality requirements are met -nav_order: 35 redirect_from: - /data_lifecycle_management/ci.html --- diff --git a/docs/understand/data_lifecycle_management/data-devenv.md b/docs/understand/data_lifecycle_management/data-devenv.md index 55055c59b16..9b1e0a4e528 100644 --- a/docs/understand/data_lifecycle_management/data-devenv.md +++ b/docs/understand/data_lifecycle_management/data-devenv.md @@ -1,10 +1,8 @@ --- -layout: default title: In Test parent: Data Lifecycle Management grand_parent: Understanding lakeFS description: lakeFS enables a safe test environment on your data lake without the need to copy or mock data -nav_order: 25 redirect_from: - /data_lifecycle_management/data-devenv.html --- diff --git a/docs/understand/data_lifecycle_management/index.md b/docs/understand/data_lifecycle_management/index.md index 0e63c165261..779c8a40122 100644 --- a/docs/understand/data_lifecycle_management/index.md +++ b/docs/understand/data_lifecycle_management/index.md @@ -1,11 +1,18 @@ --- -layout: default title: Data Lifecycle Management description: Learn how lakeFS enables data lifecycle management. -nav_order: 50 parent: Understanding lakeFS has_children: true +has_toc: false redirect_from: - /branching/recommendations.html - /using_lakefs.html --- + +# Data Lifecycle Management in lakeFS + +lakeFS provides full support for Data Lifecycle Management through all stages: + +* [In Test](./data-devenv.md) +* [During Deployment](./ci.md) +* [In Production](./production.md) \ No newline at end of file diff --git a/docs/understand/data_lifecycle_management/production.md b/docs/understand/data_lifecycle_management/production.md index b5f78327bac..da60285fa59 100644 --- a/docs/understand/data_lifecycle_management/production.md +++ b/docs/understand/data_lifecycle_management/production.md @@ -1,10 +1,8 @@ --- -layout: default title: In Production parent: Data Lifecycle Management grand_parent: Understanding lakeFS description: lakeFS helps recover from errors and find root case in production. -nav_order: 55 redirect_from: - /data_lifecycle_management/production.html --- diff --git a/docs/understand/faq.md b/docs/understand/faq.md index 0dc8ef16d9d..8f490bd4532 100644 --- a/docs/understand/faq.md +++ b/docs/understand/faq.md @@ -1,5 +1,4 @@ --- -layout: default title: FAQ parent: Understanding lakeFS description: Have a question about lakeFS? Check out this list of Frequently Asked Questions diff --git a/docs/understand/glossary.md b/docs/understand/glossary.md index 70b172b2465..77c362a2961 100644 --- a/docs/understand/glossary.md +++ b/docs/understand/glossary.md @@ -1,9 +1,7 @@ --- -layout: default title: Glossary description: Glossary of all terms related to lakeFS technical internals and the architecture. parent: Understanding lakeFS -nav_order: 60 has_children: false redirect_from: - /reference/glossary.html diff --git a/docs/understand/how/index.md b/docs/understand/how/index.md index 8365b97cfdf..d95f73da484 100644 --- a/docs/understand/how/index.md +++ b/docs/understand/how/index.md @@ -1,11 +1,17 @@ --- -layout: default title: How lakeFS Works description: This section includes all the details about the lakeFS open source project. parent: Understanding lakeFS -nav_order: 40 has_children: true - +has_toc: false --- -This section includes all the details about lakeFS internals and implementation details +# How lakeFS Works + +The [Architecture]({{ site.baseurl }}/understand/architecture.html) page includes a logical overview of lakeFS and its components. + +For deep-dive content about lakeFS see: + +* [Internal database structure](./kv.md) +* [Merges in lakeFS](./merge.md) +* [Versioning Internals](./versioning-internals.md) \ No newline at end of file diff --git a/docs/understand/how/kv.md b/docs/understand/how/kv.md index a574cc1bb91..bf2d147ca57 100644 --- a/docs/understand/how/kv.md +++ b/docs/understand/how/kv.md @@ -1,10 +1,8 @@ --- -layout: default title: Internal database structure parent: How lakeFS Works grand_parent: Understanding lakeFS description: Brief introduction to lakeFS over KV -nav_order: 20 has_children: false redirect_from: - /understand/kv-in-a-nutshell.html diff --git a/docs/understand/how/merge.md b/docs/understand/how/merge.md index eabb0809df1..408c25303bb 100644 --- a/docs/understand/how/merge.md +++ b/docs/understand/how/merge.md @@ -1,17 +1,15 @@ --- -layout: default title: Merge description: Using lakeFS, you can merge different commits and references into a branch. The purpose of this document is to explain how to use this feature. parent: How lakeFS Works grand_parent: Understanding lakeFS -nav_order: 9999 has_children: false redirect_from: - /reference/merge.html - /understand/merge.html --- -# Merge +# Merges in lakeFS The merge operation in lakeFS is similar to Git. It incorporates changes from a _merge source_ (a commit/reference) into a _merge destination_ (a **branch**). diff --git a/docs/understand/how/versioning-internals.md b/docs/understand/how/versioning-internals.md index 083b36c5ef1..010ec497117 100644 --- a/docs/understand/how/versioning-internals.md +++ b/docs/understand/how/versioning-internals.md @@ -1,10 +1,8 @@ --- -layout: default title: Versioning Internals parent: How lakeFS Works grand_parent: Understanding lakeFS description: This section explains how versioning works in lakeFS. -nav_order: 10 has_children: false redirect_from: - /understand/architecture/data-model.html diff --git a/docs/understand/index.md b/docs/understand/index.md index cf65e43296a..0bdc83c6a91 100644 --- a/docs/understand/index.md +++ b/docs/understand/index.md @@ -1,7 +1,48 @@ --- -layout: default title: Understanding lakeFS description: Details about lakeFS Concepts and Design nav_order: 15 has_children: true +has_toc: false --- + +# Understanding lakeFS + +lakeFS Docs + +## Architecture and Internals + +The [Architecture](./architecture.html) page includes a logical overview of lakeFS and its components. + +For deep-dive content about lakeFS see: + +* [Internal database structure](./how/kv.md) +* [Merges in lakeFS](./how/merge.md) +* [Versioning Internals](./how/versioning-internals.md) + +## lakeFS Use Cases + +lakeFS has many uses in the data world, including + +* [CI/CD for Data Lakes](./use_cases/cicd_for_data.md) +* [ETL Testing Environment](./use_cases/etl_testing.md) +* [Reproducibility](./use_cases/reproducibility.md) +* [Rollback](./use_cases/rollback.md) + +One of the important things that lakeFS provides is full support for [Data Lifecycle Management](./data_lifecycle_management/) through all stages: + +* [In Test](./data_lifecycle_management/data-devenv.md) +* [During Deployment](./data_lifecycle_management/ci.md) +* [In Production](./data_lifecycle_management/production.md) + +## lakeFS Concepts and Model + +lakeFS adopts many of the terms and concepts from git. [This page](./model.html) goes into details on the similarities and differences, and provides a good background to the concepts used in lakeFS. + +## Performance + +Check out the [Performance best practices](./performance-best-practices.html) guide for useful hints and tips on ensuring high performance from lakeFS. + +## FAQ and Glossary + +The [FAQ](./faq.html) covers many common questions around lakeFS, and the [glossary](./glossary.html) provides a useful reference for the terms used in lakeFS. diff --git a/docs/understand/model.md b/docs/understand/model.md index e976f4841ad..cacfc25811d 100644 --- a/docs/understand/model.md +++ b/docs/understand/model.md @@ -1,9 +1,7 @@ --- -layout: default -title: Model +title: Concepts and Model description: The lakeFS object model blends the object models of Git and of object stores such as S3. Read this page to learn more. parent: Understanding lakeFS -nav_order: 20 has_children: false redirect_from: - /reference/object-model.html @@ -11,7 +9,7 @@ redirect_from: - /understand/object-model.html --- -# Model +# lakeFS Concepts and Model {% include toc_2-3.html %} @@ -22,9 +20,9 @@ defines the common concepts of lakeFS. lakeFS is an interface to manage objects in an object store. -The actual data itself is not stored inside lakeFS directly but in an [underlying object store](#concepts-unique-to-lakefs). -lakeFS manages pointers and additional metadata about these objects. -{: .note } +{: .tip } +> The actual data itself is not stored inside lakeFS directly but in an [underlying object store](#concepts-unique-to-lakefs). +> lakeFS manages pointers and additional metadata about these objects. ## Version Control diff --git a/docs/understand/performance-best-practices.md b/docs/understand/performance-best-practices.md index de3fe09968d..234b8518bf5 100644 --- a/docs/understand/performance-best-practices.md +++ b/docs/understand/performance-best-practices.md @@ -1,9 +1,7 @@ --- -layout: default title: Performance Best Practices parent: Understanding lakeFS description: This section suggests performance best practices to work with lakeFS. -nav_order: 26 has_children: false --- # Performance Best Practices diff --git a/docs/understand/use_cases/etl_testing.md b/docs/understand/use_cases/etl_testing.md index 3f1c50d78ae..015a6b7e717 100644 --- a/docs/understand/use_cases/etl_testing.md +++ b/docs/understand/use_cases/etl_testing.md @@ -1,5 +1,4 @@ --- -layout: default title: ETL Testing Environment description: In this tutorial, we will explore how to safely run ETL testing using lakeFS to create isolated dev/test data environments to run data pipelines. parent: Use Cases diff --git a/docs/understand/use_cases/index.md b/docs/understand/use_cases/index.md index 58afdcf8330..c8954d7db90 100644 --- a/docs/understand/use_cases/index.md +++ b/docs/understand/use_cases/index.md @@ -1,11 +1,25 @@ --- -layout: default title: Use Cases description: Better understand how to use the features of lakeFS for specific use cases. parent: Understanding lakeFS -nav_order: 30 has_children: true +has_toc: false redirect_from: - /use_cases/ - /use_cases/index.html --- + +# lakeFS Use Cases + +lakeFS has many uses in the data world, including + +* [CI/CD for Data Lakes](./cicd_for_data.md) +* [ETL Testing Environment](./etl_testing.md) +* [Reproducibility](./reproducibility.md) +* [Rollback](./rollback.md) + +One of the important things that lakeFS provides is full support for [Data Lifecycle Management](../data_lifecycle_management/) through all stages: + +* [In Test](../data_lifecycle_management/data-devenv.md) +* [During Deployment](../data_lifecycle_management/ci.md) +* [In Production](../data_lifecycle_management/production.md) \ No newline at end of file