From 622318492586068b189495b5b37841be4bf31951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Fri, 8 Sep 2023 15:15:22 +0200 Subject: [PATCH] Add HDP3 image with KMS --- testing/hdp3.1-hive-kerberized-kms/Dockerfile | 38 ++++++ .../files/etc/hadoop-kms/conf/core-site.xml | 9 ++ .../files/etc/hadoop-kms/conf/kms-acls.xml | 35 +++++ .../files/etc/hadoop-kms/conf/kms-site.xml | 29 +++++ .../files/etc/hadoop-kms/conf/passwordfile | 1 + .../files/etc/hadoop/conf/core-site.xml | 80 ++++++++++++ .../files/etc/hadoop/conf/hdfs-site.xml | 123 ++++++++++++++++++ .../files/etc/hadoop/conf/taskcontroller.cfg | 5 + .../files/etc/hive/conf/hive-site.xml | 116 +++++++++++++++++ .../files/etc/hive/conf/hiveserver2-site.xml | 16 +++ .../files/etc/supervisord.d/kms.conf | 9 ++ .../files/root/setup_kms.sh | 65 +++++++++ 12 files changed, 526 insertions(+) create mode 100644 testing/hdp3.1-hive-kerberized-kms/Dockerfile create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/core-site.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-acls.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-site.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/passwordfile create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/core-site.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/hdfs-site.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/taskcontroller.cfg create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hive-site.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hiveserver2-site.xml create mode 100644 testing/hdp3.1-hive-kerberized-kms/files/etc/supervisord.d/kms.conf create mode 100755 testing/hdp3.1-hive-kerberized-kms/files/root/setup_kms.sh diff --git a/testing/hdp3.1-hive-kerberized-kms/Dockerfile b/testing/hdp3.1-hive-kerberized-kms/Dockerfile new file mode 100644 index 00000000..1b25fd32 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/Dockerfile @@ -0,0 +1,38 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM testing/hdp3.1-hive-kerberized:unlabelled +ARG ADDPRINC_ARGS="-maxrenewlife \"10 days\" +allow_renewable" + +# Install KMS +ARG HADOOP_VERSION=3.1.4 +ARG HADOOP_BINARY_PATH=https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz +RUN curl -fLsS -o /tmp/hadoop.tar.gz --url $HADOOP_BINARY_PATH && \ + tar xzf /tmp/hadoop.tar.gz --directory /opt && mv /opt/hadoop-$HADOOP_VERSION /opt/hadoop + +# COPY CONFIGURATION +COPY ./files / +COPY ./files/etc/hadoop-kms/conf /opt/hadoop/etc/hadoop/ + +# add users and group for testing purposes +RUN set -xeu && \ + for username in alice bob charlie; do \ + groupadd "${username}_group" && \ + useradd -g "${username}_group" "${username}" && \ + /usr/sbin/kadmin.local -q "addprinc ${ADDPRINC_ARGS} -randkey ${username}/hadoop-master@LABS.TERADATA.COM" && \ + /usr/sbin/kadmin.local -q "xst -norandkey -k /etc/hive/conf/${username}.keytab ${username}/hadoop-master"; \ + done && \ + echo OK + +RUN /root/setup_kms.sh + +CMD supervisord -c /etc/supervisord.conf diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/core-site.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/core-site.xml new file mode 100644 index 00000000..ddf829a1 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/core-site.xml @@ -0,0 +1,9 @@ + + + + + fs.defaultFS + hdfs://hadoop-master:9000 + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-acls.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-acls.xml new file mode 100644 index 00000000..0a31b900 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-acls.xml @@ -0,0 +1,35 @@ + + + + default.key.acl.ALL + * + + + + default.key.acl.MANAGEMENT + * + + + + default.key.acl.READ + * + + + + default.key.acl.GENERATE_EEK + * + + + + default.key.acl.DECRYPT_EEK + * + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-site.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-site.xml new file mode 100644 index 00000000..52ee4b51 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/kms-site.xml @@ -0,0 +1,29 @@ + + + + + hadoop.kms.key.provider.uri + jceks://file@/${user.home}/kms.keystore + + + + hadoop.kms.authentication.type + kerberos + + + + hadoop.kms.authentication.kerberos.keytab + /etc/hadoop/conf/HTTP.keytab + + + + hadoop.kms.authentication.kerberos.principal + HTTP/hadoop-master + + + + hadoop.kms.authentication.kerberos.name.rules + DEFAULT + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/passwordfile b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/passwordfile new file mode 100644 index 00000000..7d8381bf --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop-kms/conf/passwordfile @@ -0,0 +1 @@ +abc1234 diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/core-site.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/core-site.xml new file mode 100644 index 00000000..f1b1a01b --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/core-site.xml @@ -0,0 +1,80 @@ + + + + + fs.defaultFS + hdfs://hadoop-master:9000 + + + + fs.permissions.umask-mode + 000 + + + + + hadoop.proxyuser.httpfs.hosts + * + + + + hadoop.proxyuser.httpfs.groups + * + + + + + hadoop.proxyuser.hive.hosts + * + + + + hadoop.proxyuser.hive.groups + * + + + + + hadoop.proxyuser.hdfs.groups + * + + + + hadoop.proxyuser.hdfs.hosts + * + + + + + hadoop.proxyuser.presto-server.groups + * + + + + hadoop.proxyuser.presto-server.hosts + * + + + + + hadoop.security.authentication + kerberos + + + + hadoop.security.authorization + true + + + + + hadoop.security.key.provider.path + kms://http@hadoop-master:9600/kms + + + + dfs.encryption.key.provider.uri + kms://http@hadoop-master:9600/kms + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/hdfs-site.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/hdfs-site.xml new file mode 100644 index 00000000..185d814f --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/hdfs-site.xml @@ -0,0 +1,123 @@ + + + + + + dfs.namenode.name.dir + /var/lib/hadoop-hdfs/cache/name/ + + + + dfs.datanode.data.dir + /var/lib/hadoop-hdfs/cache/data/ + + + + fs.viewfs.mounttable.hadoop-viewfs.link./default + hdfs://hadoop-master:9000/user/hive/warehouse + + + + + dfs.block.access.token.enable + true + + + + + dfs.namenode.keytab.file + /etc/hadoop/conf/hdfs.keytab + + + + + dfs.namenode.kerberos.principal + hdfs/hadoop-master@LABS.TERADATA.COM + + + + dfs.namenode.kerberos.internal.spnego.principal + HTTP/hadoop-master@LABS.TERADATA.COM + + + + + dfs.secondary.namenode.keytab.file + /etc/hadoop/conf/hdfs.keytab + + + + + dfs.secondary.namenode.kerberos.principal + hdfs/hadoop-master@LABS.TERADATA.COM + + + + dfs.secondary.namenode.kerberos.internal.spnego.principal + HTTP/hadoop-master@LABS.TERADATA.COM + + + + + dfs.datanode.keytab.file + /etc/hadoop/conf/hdfs.keytab + + + + + dfs.datanode.kerberos.principal + hdfs/hadoop-master@LABS.TERADATA.COM + + + + + dfs.webhdfs.enabled + true + + + + + dfs.web.authentication.kerberos.principal + HTTP/hadoop-master@LABS.TERADATA.COM + + + + dfs.web.authentication.kerberos.keytab + /etc/hadoop/conf/HTTP.keytab + + + + + ignore.secure.ports.for.testing + true + + + + dfs.http.policy + HTTP_ONLY + + + + dfs.namenode.acls.enabled + true + + + + dfs.permissions + true + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/taskcontroller.cfg b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/taskcontroller.cfg new file mode 100644 index 00000000..2384a21e --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hadoop/conf/taskcontroller.cfg @@ -0,0 +1,5 @@ +hadoop.log.dir=/var/log/hadoop-mapreduce +mapreduce.tasktracker.group=mapred +banned.users=mapred,bin +min.user.id=0 +allowed.system.users=nobody,hive diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hive-site.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hive-site.xml new file mode 100644 index 00000000..4e1ab443 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hive-site.xml @@ -0,0 +1,116 @@ + + + + + javax.jdo.option.ConnectionURL + jdbc:mysql://localhost/metastore + + + + javax.jdo.option.ConnectionDriverName + com.mysql.jdbc.Driver + + + + javax.jdo.option.ConnectionUserName + root + + + + javax.jdo.option.ConnectionPassword + root + + + + datanucleus.autoCreateSchema + false + + + + datanucleus.fixedDatastore + true + + + + datanucleus.autoStartMechanism + SchemaTable + + + + hive.security.authorization.createtable.owner.grants + ALL + + + + hive.users.in.admin.role + hdfs,hive + + + + + hive.server2.authentication + KERBEROS + + + + hive.server2.enable.impersonation + false + + + + hive.server2.authentication.kerberos.principal + hive/hadoop-master@LABS.TERADATA.COM + + + + hive.server2.authentication.kerberos.keytab + /etc/hive/conf/hive.keytab + + + + hive.metastore.sasl.enabled + true + + + + hive.metastore.kerberos.keytab.file + /etc/hive/conf/hive.keytab + + + + hive.metastore.kerberos.principal + hive/hadoop-master@LABS.TERADATA.COM + + + + + metastore.storage.schema.reader.impl + org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader + + + + hive.support.concurrency + true + + + + hive.compactor.initiator.on + true + + + + hive.compactor.worker.threads + 1 + + + + hive.txn.manager + org.apache.hadoop.hive.ql.lockmgr.DbTxnManager + + + + hive.metastore.disallow.incompatible.col.type.changes + false + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hiveserver2-site.xml b/testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hiveserver2-site.xml new file mode 100644 index 00000000..520cd41d --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/hive/conf/hiveserver2-site.xml @@ -0,0 +1,16 @@ + + + + + + hive.metastore.uris + thrift://localhost:9083 + + + + + hive.security.authenticator.manager + org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator + + + diff --git a/testing/hdp3.1-hive-kerberized-kms/files/etc/supervisord.d/kms.conf b/testing/hdp3.1-hive-kerberized-kms/files/etc/supervisord.d/kms.conf new file mode 100644 index 00000000..0e94b344 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/etc/supervisord.d/kms.conf @@ -0,0 +1,9 @@ +[program:kms] +environment=HADOOP_KEYSTORE_PASSWORD="abc1234" +command=/opt/hadoop/sbin/kms.sh run +autostart=true +autorestart=true +redirect_stderr=true +##### stdout_logfile=/var/log/hadoop-kms/kms.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 diff --git a/testing/hdp3.1-hive-kerberized-kms/files/root/setup_kms.sh b/testing/hdp3.1-hive-kerberized-kms/files/root/setup_kms.sh new file mode 100755 index 00000000..84cd69e6 --- /dev/null +++ b/testing/hdp3.1-hive-kerberized-kms/files/root/setup_kms.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +set -xeuo pipefail + +function retry() { + END=$(($(date +%s) + 600)) + + while (( $(date +%s) < $END )); do + set +e + "$@" + EXIT_CODE=$? + set -e + + if [[ ${EXIT_CODE} == 0 ]]; then + break + fi + sleep 5 + done + + return ${EXIT_CODE} +} + +supervisord -c /etc/supervisord.conf & + +retry kinit -kt /etc/hadoop/conf/hdfs.keytab hdfs/hadoop-master@LABS.TERADATA.COM +retry hdfs dfsadmin -safemode leave + +retry kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-master@LABS.TERADATA.COM +while ! beeline -n hive -e "SELECT 1"; do + echo "Waiting for HiveServer2 ..." + sleep 10s +done + +# the default directory must be empty before enabling encryption +hiveUrl="jdbc:hive2://hadoop-master:10000/default;principal=hive/hadoop-master@LABS.TERADATA.COM" +beeline -u "$hiveUrl" -e "drop schema information_schema cascade; drop schema sys cascade;" +hadoop fs -rm -f -r /user/hive/warehouse/.Trash + +retry kinit -kt /etc/hadoop/conf/hdfs.keytab hdfs/hadoop-master@LABS.TERADATA.COM +hadoop key create key1 -size 256 +hdfs crypto -createZone -keyName key1 -path /user/hive/warehouse +hdfs crypto -listZones + +# Create `information_schema` and `sys` schemas in Hive +retry kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-master@LABS.TERADATA.COM +/usr/hdp/current/hive-client/bin/schematool -userName hive -metaDbType mysql -dbType hive \ + -url "$hiveUrl" -driver org.apache.hive.jdbc.HiveDriver \ + -initSchema + +su -s /bin/bash hdfs -c 'kinit -kt /etc/hadoop/conf/hdfs.keytab hdfs/hadoop-master@LABS.TERADATA.COM' +for username in alice bob charlie; do + su -s /bin/bash hdfs -c "/usr/bin/hadoop fs -mkdir /user/$username" + su -s /bin/bash hdfs -c "/usr/bin/hadoop fs -chown $username /user/$username" +done + +supervisorctl stop all +pkill -F /var/run/supervisord.pid +wait + +# Purge Kerberos credential cache of root user +kdestroy + +find /var/log -type f -name \*.log -printf "truncate %p\n" -exec truncate --size 0 {} \; && \ +# Purge /tmp, this includes credential caches of other users +find /tmp -mindepth 1 -maxdepth 1 -exec rm -rf {} +