Document not found (404)
+This URL is invalid, sorry. Please use the navigation bar or search to continue.
+ +diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000000..ca9d853b60 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,8 @@ +[build] +rustflags = ["-C", "target-cpu=native"] + +[target.wasm32-unknown-unknown] +rustflags = ["-C", "target-feature=+simd128"] + +[target.x86_64-apple-darwin] +rustflags = ["-C", "target-feature=-avx,-avx2"] \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..05bcdac6e8 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" + open-pull-requests-limit: 5 diff --git a/.github/workflows/book-cd.yml b/.github/workflows/book-cd.yml new file mode 100644 index 0000000000..e8149e3832 --- /dev/null +++ b/.github/workflows/book-cd.yml @@ -0,0 +1,40 @@ +name: Deploy Rust book +on: + push: + branches: + - main + +jobs: + deploy: + runs-on: ubuntu-latest + permissions: + contents: write # To push a branch + pull-requests: write # To create a PR from that branch + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Install latest mdbook + run: | + tag=$(curl 'https://api.github.com/repos/rust-lang/mdbook/releases/latest' | jq -r '.tag_name') + url="https://github.com/rust-lang/mdbook/releases/download/${tag}/mdbook-${tag}-x86_64-unknown-linux-gnu.tar.gz" + mkdir mdbook + curl -sSL $url | tar -xz --directory=./mdbook + echo `pwd`/mdbook >> $GITHUB_PATH + - name: Deploy GitHub Pages + run: | + # This assumes your book is in the root of your repository. + # Just add a `cd` here if you need to change to another directory. + cd candle-book + mdbook build + git worktree add gh-pages + git config user.name "Deploy from CI" + git config user.email "" + cd gh-pages + # Delete the ref to avoid keeping history. + git update-ref -d refs/heads/gh-pages + rm -rf * + mv ../book/* . + git add . + git commit -m "Deploy $GITHUB_SHA to gh-pages" + git push --force --set-upstream origin gh-pages diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml new file mode 100644 index 0000000000..bb4d0494fb --- /dev/null +++ b/.github/workflows/book.yml @@ -0,0 +1,29 @@ +name: CI +on: + pull_request: + +jobs: + test: + name: Test candle-book + runs-on: ubuntu-latest + permissions: + contents: write # To push a branch + pull-requests: write # To create a PR from that branch + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + rustup set profile minimal + rustup toolchain install stable + rustup default stable + - name: Install latest mdbook + run: | + tag=$(curl 'https://api.github.com/repos/rust-lang/mdbook/releases/latest' | jq -r '.tag_name') + url="https://github.com/rust-lang/mdbook/releases/download/${tag}/mdbook-${tag}-x86_64-unknown-linux-gnu.tar.gz" + mkdir bin + curl -sSL $url | tar -xz --directory=bin + echo "$(pwd)/bin" >> $GITHUB_PATH + - name: Run tests + run: cd candle-book && cargo build && mdbook test -L ../target/debug/deps/ + + diff --git a/.github/workflows/ci_cuda.yaml b/.github/workflows/ci_cuda.yaml new file mode 100644 index 0000000000..0c874e2433 --- /dev/null +++ b/.github/workflows/ci_cuda.yaml @@ -0,0 +1,33 @@ +name: CI / cuda + +on: + workflow_dispatch: + pull_request: + +jobs: + test-cuda: + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + runs-on: [single-gpu, nvidia-gpu, t4, ci] + container: + image: nvidia/cuda:12.3.1-devel-ubuntu22.04 + options: --gpus 0 + if: ${{ github.event.pull_request.head.repo.full_name == github.event.pull_request.base.repo.full_name }} + permissions: + contents: write + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + security-events: write + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Install dependencies + run: apt-get update && apt install curl build-essential libssl-dev protobuf-compiler pkg-config -y + - name: Install Rust Stable + uses: actions-rust-lang/setup-rust-toolchain@v1 + - uses: Swatinem/rust-cache@v2 + - name: Test (cuda) + run: cargo test --features cuda diff --git a/.github/workflows/maturin.yml b/.github/workflows/maturin.yml new file mode 100644 index 0000000000..46bdb903da Binary files /dev/null and b/.github/workflows/maturin.yml differ diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000000..68e2eee31e --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,68 @@ +name: PyO3-CI + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - candle-pyo3/** + pull_request: + paths: + - candle-pyo3/** + +jobs: + build_and_test: + name: Check everything builds & tests + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] # For now, only test on Linux + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + architecture: "x64" + + - name: Cache Cargo Registry + uses: actions/cache@v1 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + + - name: Install Protoc + uses: arduino/setup-protoc@v2 + with: + version: "25.0" + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install + working-directory: ./candle-pyo3 + run: | + python -m venv .env + source .env/bin/activate + pip install -U pip + pip install pytest maturin black + python -m maturin develop -r --features onnx + + - name: Check style + working-directory: ./candle-pyo3 + run: | + source .env/bin/activate + python stub.py --check + black --check . + + - name: Run tests + working-directory: ./candle-pyo3 + run: | + source .env/bin/activate + python -m pytest -s -v tests diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml new file mode 100644 index 0000000000..ee480c474c --- /dev/null +++ b/.github/workflows/rust-ci.yml @@ -0,0 +1,78 @@ +on: + push: + branches: + - main + pull_request: + +name: Continuous integration + +jobs: + check: + name: Check + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + rust: [stable] + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.rust }} + override: true + - uses: actions-rs/cargo@v1 + with: + command: check + args: --workspace + + test: + name: Test Suite + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + rust: [stable] + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.rust }} + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + args: --workspace + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add clippy + - uses: actions-rs/cargo@v1 + with: + command: clippy + args: --workspace --tests --examples -- -D warnings diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml new file mode 100644 index 0000000000..9cbbf68037 --- /dev/null +++ b/.github/workflows/trufflehog.yml @@ -0,0 +1,15 @@ +on: + push: + +name: Secret Leaks + +jobs: + trufflehog: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Secret Scanning + uses: trufflesecurity/trufflehog@main diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..38a7d50471 --- /dev/null +++ b/.gitignore @@ -0,0 +1,45 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +data/ +dist/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# editor config +.helix +.vscode + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +*tokenizer*.json +*.npz + +perf.data +flamegraph.svg +*.dylib +*.so +*.swp +*.swo +trace-*.json + +candle-wasm-examples/*/build +candle-wasm-examples/*/*.bin +candle-wasm-examples/*/*.jpeg +candle-wasm-examples/*/audios/*.wav +candle-wasm-examples/**/*.safetensors +candle-wasm-examples/**/*.gguf +candle-wasm-examples/*/package-lock.json +candle-wasm-examples/**/config*.json +.DS_Store +.idea/* +__pycache__ +out.safetensors +out.wav diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..12631cbc27 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "candle-examples/examples/flash-attn/cutlass"] + path = candle-flash-attn/cutlass + url = https://github.com/NVIDIA/cutlass.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..83e3e688e3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +repos: + - repo: https://github.com/Narsil/pre-commit-rust + rev: 2eed6366172ef2a5186e8785ec0e67243d7d73d0 + hooks: + - id: fmt + name: "Rust (fmt)" + - id: clippy + name: "Rust (clippy)" + args: + [ + "--tests", + "--examples", + "--", + "-Dwarnings", + ] diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..b2dbd68012 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.formatting.provider": "none", + "python.testing.pytestArgs": [ + "candle-pyo3" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/404.html b/404.html new file mode 100644 index 0000000000..f6e858ff4e --- /dev/null +++ b/404.html @@ -0,0 +1,218 @@ + + +
+ + +This URL is invalid, sorry. Please use the navigation bar or search to continue.
+ +You might have seen in the code base a lot of .unwrap()
or ?
.
+If you're unfamiliar with Rust check out the Rust book
+for more information.
What's important to know though, is that if you want to know where a particular operation failed
+You can simply use RUST_BACKTRACE=1
to get the location of where the model actually failed.
Let's see on failing code:
+let x = Tensor::zeros((1, 784), DType::F32, &device)?;
+let y = Tensor::zeros((1, 784), DType::F32, &device)?;
+let z = x.matmul(&y)?;
+Will print at runtime:
+Error: ShapeMismatchBinaryOp { lhs: [1, 784], rhs: [1, 784], op: "matmul" }
+
+After adding RUST_BACKTRACE=1
:
Error: WithBacktrace { inner: ShapeMismatchBinaryOp { lhs: [1, 784], rhs: [1, 784], op: "matmul" }, backtrace: Backtrace [{ fn: "candle::error::Error::bt", file: "/home/nicolas/.cargo/git/checkouts/candle-5bb8ef7e0626d693/f291065/candle-core/src/error.rs", line: 200 }, { fn: "candle::tensor::Tensor::matmul", file: "/home/nicolas/.cargo/git/checkouts/candle-5bb8ef7e0626d693/f291065/candle-core/src/tensor.rs", line: 816 }, { fn: "myapp::main", file: "./src/main.rs", line: 29 }, { fn: "core::ops::function::FnOnce::call_once", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs", line: 250 }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs", line: 135 }, { fn: "std::rt::lang_start::{{closure}}", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 166 }, { fn: "core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs", line: 284 }, { fn: "std::panicking::try::do_call", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 500 }, { fn: "std::panicking::try", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 464 }, { fn: "std::panic::catch_unwind", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panic.rs", line: 142 }, { fn: "std::rt::lang_start_internal::{{closure}}", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 148 }, { fn: "std::panicking::try::do_call", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 500 }, { fn: "std::panicking::try", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 464 }, { fn: "std::panic::catch_unwind", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panic.rs", line: 142 }, { fn: "std::rt::lang_start_internal", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 148 }, { fn: "std::rt::lang_start", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 165 }, { fn: "main" }, { fn: "__libc_start_main" }, { fn: "_start" }] }
+
+Not super pretty at the moment, but we can see error occurred on { fn: "myapp::main", file: "./src/main.rs", line: 29 }
Another thing to note, is that since Rust is compiled it is not necessarily as easy to recover proper stacktraces
+especially in release builds. We're using anyhow
for that.
+The library is still young, please report any issues detecting where an error is coming from.
When running a model on Cuda, you might get a stacktrace not really representing the error. +The reason is that CUDA is async by nature, and therefore the error might be caught while you were sending totally different kernels.
+One way to avoid this is to use CUDA_LAUNCH_BLOCKING=1
as an environment variable. This will force every kernel to be launched sequentially.
+You might still however see the error happening on other kernels as the faulty kernel might exit without an error but spoiling some pointer for which the error will happen when dropping the CudaSlice
only.
If this occurs, you can use compute-sanitizer
+This tool is like valgrind
but for cuda. It will help locate the errors in the kernels.
Cheatsheet:
+Using PyTorch | Using Candle | |
---|---|---|
Creation | torch.Tensor([[1, 2], [3, 4]]) | Tensor::new(&[[1f32, 2.], [3., 4.]], &Device::Cpu)? |
Creation | torch.zeros((2, 2)) | Tensor::zeros((2, 2), DType::F32, &Device::Cpu)? |
Indexing | tensor[:, :4] | tensor.i((.., ..4))? |
Operations | tensor.view((2, 2)) | tensor.reshape((2, 2))? |
Operations | a.matmul(b) | a.matmul(&b)? |
Arithmetic | a + b | &a + &b |
Device | tensor.to(device="cuda") | tensor.to_device(&Device::new_cuda(0)?)? |
Dtype | tensor.to(dtype=torch.float16) | tensor.to_dtype(&DType::F16)? |
Saving | torch.save({"A": A}, "model.bin") | candle::safetensors::save(&HashMap::from([("A", A)]), "model.safetensors")? |
Loading | weights = torch.load("model.bin") | candle::safetensors::load("model.safetensors", &device) |
We will now create the hello world of the ML world, building a model capable of solving MNIST dataset.
+Open src/main.rs
and fill in this content:
+extern crate candle_core; +use candle_core::{Device, Result, Tensor}; + +struct Model { + first: Tensor, + second: Tensor, +} + +impl Model { + fn forward(&self, image: &Tensor) -> Result<Tensor> { + let x = image.matmul(&self.first)?; + let x = x.relu()?; + x.matmul(&self.second) + } +} + +fn main() -> Result<()> { + // Use Device::new_cuda(0)?; to use the GPU. + let device = Device::Cpu; + + let first = Tensor::randn(0f32, 1.0, (784, 100), &device)?; + let second = Tensor::randn(0f32, 1.0, (100, 10), &device)?; + let model = Model { first, second }; + + let dummy_image = Tensor::randn(0f32, 1.0, (1, 784), &device)?; + + let digit = model.forward(&dummy_image)?; + println!("Digit {digit:?} digit"); + Ok(()) +}
Everything should now run with:
+cargo run --release
+
+Linear
layer.Now that we have this, we might want to complexify things a bit, for instance by adding bias
and creating
+the classical Linear
layer. We can do as such
+#![allow(unused)] +fn main() { +extern crate candle_core; +use candle_core::{Device, Result, Tensor}; +struct Linear{ + weight: Tensor, + bias: Tensor, +} +impl Linear{ + fn forward(&self, x: &Tensor) -> Result<Tensor> { + let x = x.matmul(&self.weight)?; + x.broadcast_add(&self.bias) + } +} + +struct Model { + first: Linear, + second: Linear, +} + +impl Model { + fn forward(&self, image: &Tensor) -> Result<Tensor> { + let x = self.first.forward(image)?; + let x = x.relu()?; + self.second.forward(&x) + } +} +}
This will change the model running code into a new function
++extern crate candle_core; +use candle_core::{Device, Result, Tensor}; +struct Linear{ + weight: Tensor, + bias: Tensor, +} +impl Linear{ + fn forward(&self, x: &Tensor) -> Result<Tensor> { + let x = x.matmul(&self.weight)?; + x.broadcast_add(&self.bias) + } +} + +struct Model { + first: Linear, + second: Linear, +} + +impl Model { + fn forward(&self, image: &Tensor) -> Result<Tensor> { + let x = self.first.forward(image)?; + let x = x.relu()?; + self.second.forward(&x) + } +} +fn main() -> Result<()> { + // Use Device::new_cuda(0)?; to use the GPU. + // Use Device::Cpu; to use the CPU. + let device = Device::cuda_if_available(0)?; + + // Creating a dummy model + let weight = Tensor::randn(0f32, 1.0, (784, 100), &device)?; + let bias = Tensor::randn(0f32, 1.0, (100, ), &device)?; + let first = Linear{weight, bias}; + let weight = Tensor::randn(0f32, 1.0, (100, 10), &device)?; + let bias = Tensor::randn(0f32, 1.0, (10, ), &device)?; + let second = Linear{weight, bias}; + let model = Model { first, second }; + + let dummy_image = Tensor::randn(0f32, 1.0, (1, 784), &device)?; + + // Inference on the model + let digit = model.forward(&dummy_image)?; + println!("Digit {digit:?} digit"); + Ok(()) +}
Now it works, it is a great way to create your own layers. +But most of the classical layers are already implemented in candle-nn.
+candle_nn
.For instance Linear is already there. +This Linear is coded with PyTorch layout in mind, to reuse better existing models out there, so it uses the transpose of the weights and not the weights directly.
+So instead we can simplify our example:
+cargo add --git https://github.com/huggingface/candle.git candle-nn
+
+And rewrite our examples using it
++extern crate candle_core; +extern crate candle_nn; +use candle_core::{Device, Result, Tensor}; +use candle_nn::{Linear, Module}; + +struct Model { + first: Linear, + second: Linear, +} + +impl Model { + fn forward(&self, image: &Tensor) -> Result<Tensor> { + let x = self.first.forward(image)?; + let x = x.relu()?; + self.second.forward(&x) + } +} + +fn main() -> Result<()> { + // Use Device::new_cuda(0)?; to use the GPU. + let device = Device::Cpu; + + // This has changed (784, 100) -> (100, 784) ! + let weight = Tensor::randn(0f32, 1.0, (100, 784), &device)?; + let bias = Tensor::randn(0f32, 1.0, (100, ), &device)?; + let first = Linear::new(weight, Some(bias)); + let weight = Tensor::randn(0f32, 1.0, (10, 100), &device)?; + let bias = Tensor::randn(0f32, 1.0, (10, ), &device)?; + let second = Linear::new(weight, Some(bias)); + let model = Model { first, second }; + + let dummy_image = Tensor::randn(0f32, 1.0, (1, 784), &device)?; + + let digit = model.forward(&dummy_image)?; + println!("Digit {digit:?} digit"); + Ok(()) +}
Feel free to modify this example to use Conv2d
to create a classical convnet instead.
Now that we have the running dummy code we can get to more advanced topics:
+ + +With Cuda support:
+nvcc --version
should print information about your Cuda compiler driver.nvidia-smi --query-gpu=compute_cap --format=csv
should print your GPUs compute capability, e.g. something
+like:compute_cap
+8.9
+
+You can also compile the Cuda kernels for a specific compute cap using the
+CUDA_COMPUTE_CAP=<compute cap>
environment variable.
If any of the above commands errors out, please make sure to update your Cuda version.
+candle-core
with Cuda support.Start by creating a new cargo:
+cargo new myapp
+cd myapp
+
+Make sure to add the candle-core
crate with the cuda feature:
cargo add --git https://github.com/huggingface/candle.git candle-core --features "cuda"
+
+Run cargo build
to make sure everything can be correctly built.
cargo build
+
+Without Cuda support:
+Create a new app and add candle-core
as follows:
cargo new myapp
+cd myapp
+cargo add --git https://github.com/huggingface/candle.git candle-core
+
+Finally, run cargo build
to make sure everything can be correctly built.
cargo build
+
+With mkl support
+You can also see the mkl
feature which could be interesting to get faster inference on CPU. Using mkl
This book will introduce step by step how to use candle
.