Skip to content

Commit

Permalink
global caches & cargo-sweep (#20)
Browse files Browse the repository at this point in the history
- Runs cargo sweep after the user cargo sub-command
- Introduces @ijc RUN_WITH_CACHE UDC
- Caches $CARGO_HOME is a non-locking global cache (shared across all Earthfiles). This significantly reduces cache size
  • Loading branch information
idelvall authored Nov 3, 2023
1 parent d3d983a commit f8906ce
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 30 deletions.
118 changes: 88 additions & 30 deletions rust/Earthfile
Original file line number Diff line number Diff line change
@@ -1,54 +1,112 @@
VERSION 0.7
VERSION --global-cache 0.7

# CARGO runs the cargo command "cargo $args".
# This UDC should be thread safe. Parallel builds of targets using it should be free of race conditions.
# Arguments:
# - args: Cargo subcommand and its arguments. Required
# - keep_fingerprints (false): Do not remove source packages fingerprints. Use only when source packages have been COPYed with --keep-ts option
# - args: Cargo subcommand and its arguments. Required.
# - keep_fingerprints (false): Do not remove source packages fingerprints. Use only when source packages have been COPYed with --keep-ts option.
# - sweep_days (4): The UDC uses cargo-sweep to clean build artifacts that haven't been accessed for this number of days.
# - output: Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers).
# Use this argument when you want to SAVE an ARTIFACT from the target folder (mounted cache), always trying to minimize the total size of the copied fileset.
# For example --output="release/[^\./]+" would keep all the files in /target/release that don't have any extension
# For example --output="release/[^\./]+" would keep all the files in /target/release that don't have any extension.
CARGO:
COMMAND
ARG --required args
ARG keep_fingerprints=false
ARG sweep_days=4
ARG output
IF [ "$keep_fingerprints" = "false" ]
DO +REMOVE_SOURCE_FINGERPRINTS
END
RUN --mount=type=cache,mode=0777,sharing=shared,target=$CARGO_HOME/registry \
--mount=type=cache,mode=0777,sharing=shared,target=$CARGO_HOME/git \
--mount=type=cache,mode=0777,target=target \
cargo $args; \
if [ -n "$output" ]; then \
mkdir /earthly_lib_rust_temp; \
cd target; \
find . -type f -regextype posix-egrep -regex "./$output" -exec cp --parents \{\} /earthly_lib_rust_temp \; ; \
fi
DO +INSTALL_CARGO_SWEEP
DO +RUN_WITH_CACHE --command="set -e;
echo \"Running cargo $args\" ;
cargo $args;
if [ -n \"$output\" ]; then
echo \"Copying output files\" ;
mkdir -p /earthly_lib_rust_temp;
cd target;
find . -type f -regextype posix-egrep -regex \"./$output\" -exec cp --parents \{\} /earthly_lib_rust_temp \; ;
cd ..;
fi;
echo \"Running cargo sweep -r -t $sweep_days\" ;
cargo sweep -r -t $sweep_days;
echo \"Running cargo sweep -r -i\" ;
cargo sweep -r -i;"
IF [ "$output" != "" ]
RUN mkdir -p target; \
mv /earthly_lib_rust_temp/* target 2>/dev/null || echo "no files found within ./target matching the provided output regexp" ;
END

# REMOVE_SOURCE_FINGERPRINTS removes the Cargo fingerprint folders of the source packages.
# This guarantees Cargo compiles the packages when COPY commands of the source folders have a static timestamp (see --keep-ts)
# This guarantees Cargo compiles the packages when COPY commands of the source folders have a static timestamp (see --keep-ts).
REMOVE_SOURCE_FINGERPRINTS:
COMMAND
COPY +get-stoml/stoml /tmp/stoml
ARG source_libs=$(find . -name Cargo.toml -exec bash -c '/tmp/stoml {} package.name; printf "\n"' \\;)
RUN --mount=type=cache,mode=0777,sharing=shared,target=target \
find target -name .fingerprint; \
fingerprint_folders=$(find target -name .fingerprint) ; \
for fingerprint_folder in $fingerprint_folders; do \
cd $fingerprint_folder; \
for source_lib in $source_libs; do \
find . -maxdepth 1 -regex "\./$source_lib-[^-]+" -exec rm -rf {} \; ;\
done \
done
COPY +get-tomljson/tomljson /tmp/tomljson
COPY +get-jq/jq /tmp/jq
DO +RUN_WITH_CACHE --command="set -e;
source_libs=\$(find . -name Cargo.toml -exec bash -c '/tmp/tomljson {} | /tmp/jq -r .package.name; printf \"\\n\"' \\;) ;
fingerprint_folders=\$(find target -name .fingerprint) ;
echo \"deleting fingerprints:\";
for fingerprint_folder in \$fingerprint_folders; do
cd \$fingerprint_folder;
for source_lib in \$source_libs; do
find . -maxdepth 1 -regex \"\./\$source_lib-[^-]+\" -exec bash -c 'readlink -f {}; rm -rf {}' \; ;
done
done"

# get-tomljson gets the portable tomljson binary.
get-tomljson:
FROM alpine:3.18.3
ARG USERARCH
ARG version=2.1.0
RUN wget -O tomljson.tar.xz https://github.com/pelletier/go-toml/releases/download/v${version}/tomljson_${version}_linux_${USERARCH}.tar.xz; \
tar -xf tomljson.tar.xz; \
chmod +x tomljson
SAVE ARTIFACT tomljson

# get-stoml gets the portable stoml binary
get-stoml:
# get-jq gets the portable jq binary.
get-jq:
FROM alpine:3.18.3
RUN wget -O stoml https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64; \
chmod +x stoml
SAVE ARTIFACT ./stoml stoml
ARG USERARCH
ARG version=1.7
RUN wget -O jq https://github.com/jqlang/jq/releases/download/jq-${version}/jq-linux-${USERARCH}; \
chmod +x jq
SAVE ARTIFACT jq

# INSTALL_CARGO_SWEEP installs cargo-sweep if it doesn't exist already.
INSTALL_CARGO_SWEEP:
COMMAND
DO +RUN_WITH_CACHE --command="set -e;
if [ ! -f \$CARGO_HOME/bin/cargo-sweep ]; then
cargo install cargo-sweep --root \$CARGO_HOME;
fi;"

# RUN_WITH_CACHE runs the passed command with the CARGO caches mounted.
# Arguments:
# - command (required): Command to run, can be any expression.
#
# This implementation is not expected to significantly change. Prefer using the `CARGO` UDC if you can, so you can get future improvements transparently.
RUN_WITH_CACHE:
COMMAND
ARG --required command
ARG EARTHLY_TARGET_PROJECT_NO_TAG
ARG CACHE_ID="${EARTHLY_TARGET_PROJECT_NO_TAG}#earthly-cargo-cache"
# $ORIGINAL_CARGO_HOME/bin will contain the cargo and rust binaries, as well as the installed crates.
# We save it to reset it back at the end. This location is presumed to be in the calling target layers filesystem rather than in other mount cache.
ARG ORIGINAL_CARGO_HOME=$CARGO_HOME
ARG ORIGINAL_CARGO_INSTALL_ROOT=$CARGO_INSTALL_ROOT
# Make sure that crates installed though this UDC are stored in the original cargo home, and not in the cargo home within the mount cache.
# This way, if BK garbage-collects them, the build is not broken
ENV CARGO_INSTALL_ROOT=$ORIGINAL_CARGO_HOME
# We need $CARGO_HOME/.package-cache within the earthly shared-mount-cache, so cargo can properly synchronize parallel access to $CARGO_HOME resources.
ENV CARGO_HOME="/earthly/.cargo"
RUN echo $CACHE_ID
RUN --mount=type=cache,mode=0777,id=$CACHE_ID,sharing=shared,target=/earthly \
--mount=type=cache,mode=0777,target=target \
set -e; \
mkdir -p $CARGO_HOME; \
printf "Running:\n $command\n"; \
eval $command
ENV CARGO_HOME=$ORIGINAL_CARGO_HOME
ENV CARGO_INSTALL_ROOT=$ORIGINAL_CARGO_INSTALL_ROOT
4 changes: 4 additions & 0 deletions rust/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This UDC runs the cargo command `cargo $args` caching the contents of `$CARGO_HO

First, import the UDC up in your Earthfile:
```earthfile
VERSION 0.7
IMPORT github.com/earthly/lib/rust:<version/commit> AS rust
```

Expand All @@ -33,6 +34,9 @@ Cargo caches compilations of packages in `target` folder based on their last mod

By default, this UDC removes the fingerprints of the packages found in the source code, to force their recompilation and work even when the Earthly `COPY` commands used overwrote the timestamps.

#### `sweep_days (4)`
The UDC uses [cargo-sweep](https://github.com/holmgr/cargo-sweep) to clean build artifacts that haven't been accessed for this number of days.

#### `output`
Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers).

Expand Down

0 comments on commit f8906ce

Please sign in to comment.