Skip to content
This repository has been archived by the owner on Aug 13, 2024. It is now read-only.

Commit

Permalink
E2e test for iceberg client
Browse files Browse the repository at this point in the history
  • Loading branch information
liurenjie1024 committed Jul 26, 2023
1 parent 40526e8 commit 09fcf7b
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 26 deletions.
13 changes: 9 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
[workspace.package]
version = "0.0.8"
edition = "2021"
license = "Apache-2.0"

[workspace]
members = [
"icelake"
"icelake",
"tests/integration"
]


Expand All @@ -18,13 +23,13 @@ uuid = { version = "1", features = ["v4"] }
serde = "1"
serde_json = "1"
serde_with = "3"
tokio = { version = "1.28", features = ["full"] }
tokio = { version = "1", features = ["full"] }
parquet = { version = ">=40, <45", features = ["async"] }
rust_decimal = "1.30"
chrono = "0.4"
faster-hex = "0.8.0"
once_cell = "1"
tempfile = "3"
trino = { path = "../trino-rust-client" }


log = "0.4.0"
env_logger = "0.10.0"
6 changes: 3 additions & 3 deletions icelake/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "icelake"
version = "0.0.8"
edition = "2021"
license = "Apache-2.0"
version = {workspace = true}
edition = {workspace = true}
license = {workspace = true}
description = "Pure Rust Iceberg Implementation"

[package.metadata.docs.rs]
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "icelake-integration-tests"
version = "0.0.8"
edition = "2021"
license = "Apache-2.0"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
trino = { workspace = true }
icelake = { path = "../../icelake" }
log = { workspace = true }
env_logger = { workspace = true }
tokio = { workspace = true }
7 changes: 7 additions & 0 deletions tests/integration/docker/catalog/iceberg.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
connector.name = iceberg
hive.metastore.uri=thrift://hive-metastore:9083
hive.s3.path-style-access=true
hive.s3.endpoint=http://minio:9000
hive.s3.aws-access-key=admin
hive.s3.aws-secret-key=password
iceberg.file-format=PARQUET
86 changes: 86 additions & 0 deletions tests/integration/docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
version: "3"

services:
trino:
image: trinodb/trino
container_name: trino
user: root
networks:
- iceberg_net
depends_on:
- hive-metastore
- minio
ports:
- "8080:8080"
environment:
AWS_ACCESS_KEY_ID: admin
AWS_SECRET_ACCESS_KEY: password
AWS_S3_ENDPOINT: "http://minio:9000"
volumes:
- ./catalog:/etc/trino/catalog

mysql:
image: 'mysql/mysql-server:8.0'
hostname: mysql
ports:
- '3306:3306'
environment:
MYSQL_ROOT_PASSWORD: admin
MYSQL_USER: admin
MYSQL_PASSWORD: admin
MYSQL_DATABASE: metastore_db
networks:
- iceberg_net

hive-metastore:
build: ./hive
image: hive-metastore:latest
hostname: hive-metastore
container_name: hive-metastore
ports:
- '9083:9083'
depends_on:
mysql:
condition: service_healthy
# volumes:
# - ./conf/metastore-site.xml:/opt/apache-hive-metastore-3.0.0-bin/conf/metastore-site.xml:ro
networks:
- iceberg_net

minio:
image: minio/minio
container_name: minio
environment:
MINIO_ROOT_USER: admin
MINIO_ROOT_PASSWORD: password
MINIO_DOMAIN: minio
MINIO_HTTP_TRACE: /dev/stdout
networks:
iceberg_net:
aliases:
- icebergdata.minio
ports:
- "9001:9001"
- "9000:9000"
command: [ "server", "/data", "--console-address", ":9001" ]
mc:
depends_on:
- minio
image: minio/mc
container_name: mc
networks:
iceberg_net:
environment:
AWS_ACCESS_KEY_ID: admin
AWS_SECRET_ACCESS_KEY: password
AWS_REGION: us-east-1
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc rm -r --force minio/icebergdata;
/usr/bin/mc mb minio/icebergdata;
/usr/bin/mc policy set public minio/icebergdata;
tail -f /dev/null
"
networks:
iceberg_net:
38 changes: 38 additions & 0 deletions tests/integration/docker/hive/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
FROM openjdk:8u342-jre

RUN apt-get update \
&& apt-get install --assume-yes python3 python3-pip procps \
&& apt-get clean

RUN pip3 install pyspark~=3.3.1 pandas~=1.5.3

RUN apt-get update \
&& apt-get install --assume-yes telnet \
&& apt-get clean

WORKDIR /opt

ENV HADOOP_VERSION=3.2.0
ENV METASTORE_VERSION=3.0.0

ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION}
ENV HIVE_HOME=/opt/apache-hive-metastore-${METASTORE_VERSION}-bin

RUN curl -L https://apache.org/dist/hive/hive-standalone-metastore-${METASTORE_VERSION}/hive-standalone-metastore-${METASTORE_VERSION}-bin.tar.gz | tar zxf - && \
curl -L https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
curl -L https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.19.tar.gz | tar zxf - && \
cp mysql-connector-java-8.0.19/mysql-connector-java-8.0.19.jar ${HIVE_HOME}/lib/ && \
rm -rf mysql-connector-java-8.0.19

COPY conf/metastore-site.xml ${HIVE_HOME}/conf
COPY scripts/entrypoint.sh /entrypoint.sh

RUN groupadd -r hive --gid=1000 && \
useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive && \
chown hive:hive -R ${HIVE_HOME} && \
chown hive:hive /entrypoint.sh && chmod +x /entrypoint.sh

USER hive
EXPOSE 9083

ENTRYPOINT ["sh", "-c", "/entrypoint.sh"]
52 changes: 52 additions & 0 deletions tests/integration/docker/hive/conf/metastore-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<configuration>
<property>
<name>metastore.thrift.uris</name>
<value>thrift://hive-metastore:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>metastore.task.threads.always</name>
<value>org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.MaterializationsCacheCleanerTask</value>
</property>
<property>
<name>metastore.expression.proxy</name>
<value>org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://mysql:3306/metastore_db</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>admin</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>admin</value>
</property>

<property>
<name>fs.s3a.access.key</name>
<value>admin</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>password</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio:9000</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>

</configuration>
29 changes: 29 additions & 0 deletions tests/integration/docker/hive/scripts/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/sh

export HADOOP_HOME=/opt/hadoop-3.2.0
export HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.375.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-3.2.0.jar
export JAVA_HOME=/usr/local/openjdk-8

# Make sure mariadb is ready
MAX_TRIES=8
CURRENT_TRY=1
SLEEP_BETWEEN_TRY=4
until [ "$(telnet mariadb 3306 | sed -n 2p)" = "Connected to mariadb." ] || [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; do
echo "Waiting for mariadb server..."
sleep "$SLEEP_BETWEEN_TRY"
CURRENT_TRY=$((CURRENT_TRY + 1))
done

if [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; then
echo "WARNING: Timeout when waiting for mariadb."
fi

# Check if schema exists
/opt/apache-hive-metastore-3.0.0-bin/bin/schematool -dbType mysql -info

if [ $? -eq 1 ]; then
echo "Getting schema info failed. Probably not initialized. Initializing..."
/opt/apache-hive-metastore-3.0.0-bin/bin/schematool -initSchema -dbType mysql
fi

/opt/apache-hive-metastore-3.0.0-bin/bin/start-metastore
Empty file added tests/integration/run.sh
Empty file.
46 changes: 46 additions & 0 deletions tests/integration/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use icelake::Table;
use trino::Client as Trino;

const WAREHOUSE_ROOT: &str = "s3a://icebergdata";
const CATALOG: &str = "iceberg";
const SCHEMA1: &str = "s1";
const TABLE: &str = "t1";

struct TestFixture {
trino: Trino,
// icelake: Table,
}

impl TestFixture {
// Using trino to create table, insert some data.
async fn prepare_data(&self) {
let sqls = vec![
format!("DROP SCHEMA IF EXISTS {SCHEMA1}"),
format!("CREATE SCHEMA IF NOT EXISTS {SCHEMA1} WITH (LOCATION = '{WAREHOUSE_ROOT}/{SCHEMA1}')"),
format!("DROP TABLE IF EXISTS {TABLE}"),
format!("CREATE TABLE IF NOT EXISTS {TABLE} ( id BIGINT, name VARCHAR, distance BIGINT) WITH (LOCATION = '{WAREHOUSE_ROOT}/{SCHEMA1}/{TABLE}')"),
format!("INSERT INTO {TABLE} VALUES (1, 'a', 100), (2, 'b', 200)")
];

for sql in &sqls {
log::info!("Executing '{sql}' against trino.");
self.trino.query::<()>(sql.as_str()).await.unwrap();
}

log::info!("Finished preparing data!")
}
}

async fn prepare_env() -> TestFixture {
env_logger::init();

let trino = Trino::new("http://localhost", 8080, "root", CATALOG, SCHEMA1);

TestFixture { trino }
}

#[tokio::main]
async fn main() {
let fixture = prepare_env().await;
fixture.prepare_data().await;
}
19 changes: 0 additions & 19 deletions tests/test_append_data_file.rs

This file was deleted.

0 comments on commit 09fcf7b

Please sign in to comment.