Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc: full iceberg-rest example #99

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ export CATALOG_CATALOG__IMPL=org.apache.iceberg.aws.glue.GlueCatalog
java -jar ./build/libs/iceberg-rest-image-all.jar
```

## Example

how run a iceberg-rest service using a centralized external database -> [example](./example/README.md)

## Browse

To browse the catalog, you can use `pyiceberg`:
Expand Down
15 changes: 15 additions & 0 deletions example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# iceberg-rest example

how start the example stack

```shell
docker compose up -d
```

then

```shell
pip install "pyiceberg[s3fs,pyarrow]
curl https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet -o /tmp/yellow_tripdata_2023-01.parquet
python3 iceberg_s3_example.py
```
58 changes: 58 additions & 0 deletions example/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
services:
minio:
image: minio/minio:RELEASE.2024-05-27T19-17-46Z
command: server /data --console-address ":9001"
ports:
- "9020:9000"
- "9021:9001"
environment:
MINIO_ROOT_USER: admin
MINIO_ROOT_PASSWORD: adminadmin
MINIO_SITE_REGION: eu-west-3

createbuckets:
image: minio/mc
depends_on:
- minio
entrypoint: >
/bin/sh -c "
echo sleep 10;
sleep 10;
/usr/bin/mc config host add myminio http://minio:9000 admin adminadmin;
/usr/bin/mc mb myminio/test-bucket;
exit 0;
"

iceberg_rest:
image: tabulario/iceberg-rest
depends_on:
iceberg_rest-db:
condition: service_healthy
ports:
- 8181:8181
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=adminadmin
- AWS_REGION=eu-west-3
- CATALOG_WAREHOUSE=s3://test-bucket/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
- CATALOG_S3_PATH__STYLE__ACCESS=true
- CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
- CATALOG_URI=jdbc:postgresql://iceberg_rest-db:5432/iceberg_restdb
- CATALOG_JDBC_USER=iceberg_rest
- CATALOG_JDBC_PASSWORD=password

iceberg_rest-db:
image: postgres:15.2
init: true
environment:
POSTGRES_DB: iceberg_restdb
POSTGRES_USER: iceberg_rest
POSTGRES_PASSWORD: password
ports:
- "5436:5432"
healthcheck:
test: [ "CMD", "pg_isready", "-U", "iceberg_rest", "-d", "iceberg_restdb" ]
interval: 5s
retries: 5
42 changes: 42 additions & 0 deletions example/iceberg_s3_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# pip install "pyiceberg[s3fs,pyarrow]
# curl https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet -o /tmp/yellow_tripdata_2023-01.parquet

import os

os.environ["AWS_DEFAULT_REGION"] = "eu-west-3"
os.environ["AWS_REGION"] = "eu-west-3"
os.environ["AWS_ACCESS_KEY_ID"] = "admin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "adminadmin"


def run_iceberg():
from pyiceberg.catalog.rest import RestCatalog

catalog = RestCatalog(
"default",
**{
"uri": "http://localhost:8181",
"warehouse": "s3://test-bucket/",
"s3.endpoint": "http://localhost:9020",
},
)
import pyarrow.parquet as pq

df = pq.read_table("/tmp/yellow_tripdata_2023-01.parquet")

catalog.create_namespace("default")
table = catalog.create_table(
"default.taxi_dataset",
schema=df.schema,
)

table.append(df)


table = catalog.load_table("default.taxi_dataset")
df = table.scan().to_arrow()
print(len(df))


if __name__ == '__main__':
run_iceberg()