Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

B it join #1

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ release-linux: clean
./mvnw install -Prelease -DskipTests $(PROFILES)
release:
cd native && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release
./mvnw install -Prelease -DskipTests $(PROFILES)
./mvnw install -Prelease -DskipTests $(PROFILES) -Drat.ignoreErrors=true
release-nogit:
cd native && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release
./mvnw install -Prelease -DskipTests $(PROFILES) -Dmaven.gitcommitid.skip=true
./mvnw install -Prelease -DskipTests $(PROFILES) -Dmaven.gitcommitid.skip=true -Drat.ignoreErrors=true
benchmark-%: clean release
cd spark && COMET_CONF_DIR=$(shell pwd)/conf MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="$*" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="$(filter-out $@,$(MAKECMDGOALS))" $(PROFILES)
.DEFAULT:
Expand Down
59 changes: 1 addition & 58 deletions common/src/main/scala/org/apache/comet/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

package org.apache

import java.util.Properties

import org.apache.arrow.memory.RootAllocator

package object comet {
Expand All @@ -39,60 +37,5 @@ package object comet {
* benchmarking software to provide the source revision and repository. In addition, the build
* information is included to aid in future debugging efforts for releases.
*/
private object CometBuildInfo {

val (
cometVersion: String,
cometBranch: String,
cometRevision: String,
cometBuildUserName: String,
cometBuildUserEmail: String,
cometRepoUrl: String,
cometBuildTimestamp: String) = {
val resourceStream = Thread
.currentThread()
.getContextClassLoader
.getResourceAsStream("comet-git-info.properties")
if (resourceStream == null) {
throw new CometRuntimeException("Could not find comet-git-info.properties")
}

try {
val unknownProp = "<unknown>"
val props = new Properties()
props.load(resourceStream)
(
props.getProperty("git.build.version", unknownProp),
props.getProperty("git.branch", unknownProp),
props.getProperty("git.commit.id.full", unknownProp),
props.getProperty("git.build.user.name", unknownProp),
props.getProperty("git.build.user.email", unknownProp),
props.getProperty("git.remote.origin.url", unknownProp),
props.getProperty("git.build.time", unknownProp))
} catch {
case e: Exception =>
throw new CometRuntimeException(
"Error loading properties from comet-git-info.properties",
e)
} finally {
if (resourceStream != null) {
try {
resourceStream.close()
} catch {
case e: Exception =>
throw new CometRuntimeException("Error closing Comet build info resource stream", e)
}
}
}
}
}

val COMET_VERSION = CometBuildInfo.cometVersion
val COMET_BRANCH = CometBuildInfo.cometBranch
val COMET_REVISION = CometBuildInfo.cometRevision
val COMET_BUILD_USER_EMAIL = CometBuildInfo.cometBuildUserEmail
val COMET_BUILD_USER_NAME = CometBuildInfo.cometBuildUserName
val COMET_REPO_URL = CometBuildInfo.cometRepoUrl
val COMET_BUILD_TIMESTAMP = CometBuildInfo.cometBuildTimestamp

private object CometBuildInfo {}
}
45 changes: 38 additions & 7 deletions kube/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ USER root

# Installing JDK11 as the image comes with JRE
RUN apt update \
&& apt install -y git \
&& apt install -y curl \
&& apt install -y curl git \
&& apt install -y openjdk-11-jdk \
&& apt clean

Expand All @@ -31,15 +30,47 @@ ENV PATH="/root/.cargo/bin:${PATH}"
ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false"
ENV SPARK_VERSION=3.4
ENV SCALA_VERSION=2.12
ENV GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"

# copy source files to Docker image
RUN mkdir /comet
WORKDIR /comet

RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts

# build native code first so that this layer can be re-used
# if only Scala code gets modified
COPY rust-toolchain.toml /comet/rust-toolchain.toml
COPY native /comet/native
RUN --mount=type=ssh cd native && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release

# copy the rest of the project
COPY .mvn /comet/.mvn
COPY mvnw /comet/mvnw
COPY common /comet/common
COPY dev /comet/dev
COPY docs /comet/docs
COPY fuzz-testing /comet/fuzz-testing
COPY spark /comet/spark
COPY spark-integration /comet/spark-integration
COPY scalafmt.conf /comet/scalafmt.conf
COPY .scalafix.conf /comet/.scalafix.conf
COPY Makefile /comet/Makefile
COPY pom.xml /comet/pom.xml

# Pick the JDK instead of JRE to compile Comet
RUN cd /opt \
&& git clone https://github.com/apache/datafusion-comet.git \
&& cd datafusion-comet \
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
RUN cd /comet \
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION"

FROM apache/spark:3.4.3
ENV SPARK_VERSION=3.4
ENV SCALA_VERSION=2.12
USER root
COPY --from=builder /opt/datafusion-comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.1.0-SNAPSHOT.jar $SPARK_HOME/jars

# note the use of a wildcard in the file name so that this works with both snapshot and final release versions
COPY --from=builder /comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.1.0*.jar $SPARK_HOME/jars
RUN wget -q -O \
/opt/spark/jars/gcs-connector-hadoop3-2.2.17-shaded.jar \
https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.17/gcs-connector-hadoop3-2.2.17-shaded.jar
RUN echo :quit | /opt/spark/bin/spark-shell --master local[1] --packages org.biodatageeks:sequila_2.12:1.3.6 --conf spark.jars.ivy=/opt/spark/.ivy2

2 changes: 2 additions & 0 deletions native/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[net]
git-fetch-with-cli = true
Loading
Loading