Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Add SYCL feature flags to rllm-llamacpp build (To add support for Intel GPUs) #96

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions rllm/llama-cpp-low/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ cmake = "0.1.50"
[features]
default = []
cuda = []
sycl = []
sycl_fp16 = []
sycl_nvidia = []
59 changes: 58 additions & 1 deletion rllm/llama-cpp-low/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ const SUBMODULE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/llama.cpp");
fn main() {
let ccache = true;
let cuda = std::env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new());
let sycl = std::env::var("CARGO_FEATURE_SYCL").unwrap_or(String::new());
let sycl_fp16 = std::env::var("CARGO_FEATURE_SYCL_FP16").unwrap_or(String::new());
let sycl_nvidia = std::env::var("CARGO_FEATURE_SYCL_NVIDIA").unwrap_or(String::new());

let submodule_dir = &PathBuf::from(SUBMODULE_DIR);
let header_path = submodule_dir.join("llama.h");
Expand All @@ -29,15 +32,69 @@ fn main() {
.configure_arg("-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache");
}

if cuda == "1" && sycl == "1" {
panic!("Only cuda or sycl can be activated at the same time!");
}
if cuda == "1" {
cmake.configure_arg("-DLLAMA_CUBLAS=ON");
println!("cargo:rustc-link-search=/usr/local/cuda/lib64");
println!("cargo:rustc-link-lib=cuda");
println!("cargo:rustc-link-lib=cudart");
println!("cargo:rustc-link-lib=cublas");
println!("cargo:rustc-link-lib=cupti");
}
} else if sycl == "1" {
cmake.configure_arg("-DLLAMA_SYCL=ON");
cmake.configure_arg("-DCMAKE_C_COMPILER=icx");
cmake.configure_arg("-DCMAKE_CXX_COMPILER=icpx");

let dirs = [
"/opt/intel/oneapi/compiler/latest/lib",
"/opt/intel/oneapi/mkl/latest/lib",
//"/opt/intel/oneapi/dnnl/latest/lib",
];

// *.a => static
// *.so => dynamic
for dir in dirs.iter() {
println!("cargo:rustc-link-search={}", dir);
for file in std::fs::read_dir(dir).unwrap() {
let file = file.unwrap();
let file_name = file.file_name();
let file_name = file_name.to_str().unwrap();
if !file_name.starts_with("lib") { continue; }
if file_name.contains("lp64") && !file_name.contains("ilp64") { continue; }
if file_name.contains("seq") { continue; }
if file_name == "libmkl_gnu_thread.so" { continue; }
let file_name = file_name.trim_start_matches("lib");

if file_name.ends_with(".so") {
let file_name = &file_name[..file_name.len()-3];
println!("cargo:rustc-link-lib=dylib={}", file_name);
} else if file_name.ends_with(".a") {
let file_name = &file_name[..file_name.len()-2];
println!("cargo:rustc-link-lib=static={}", file_name);
}
}
}
//panic!("stop here");

//println!("cargo:rustc-link-search=native=/opt/intel/oneapi/compiler/latest/lib");
//println!("cargo:rustc-link-lib=intlc");
//println!("cargo:rustc-link-lib=svml");
//println!("cargo:rustc-link-lib=sycl");
//println!("cargo:rustc-link-search=native=/opt/intel/oneapi/mkl/latest/lib");
//println!("cargo:rustc-link-lib=mkl_core");
//println!("cargo:rustc-link-lib=mkl_sycl_blas");
//println!("cargo:rustc-link-lib=mkl_sycl");
}
if sycl_fp16 == "1" {
cmake.configure_arg("-DLLAMA_SYCL_F16=ON");
}
if sycl_nvidia == "1" {
cmake.configure_arg("-DLLAMA_SYCL_TARGET=NVIDIA");
}
cmake.very_verbose(true);

let dst = cmake.build();

println!("cargo:rustc-link-search=native={}/lib", dst.display());
Expand Down
44 changes: 40 additions & 4 deletions rllm/rllm-cuda/server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,38 @@ while [ "$1" != "" ] ; do
exit 1
fi
;;
--sycl )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl"
else
echo "--sycl only valid for llama.cpp"
exit 1
fi
;;
--sycl-fp16 )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl,sycl_fp16"
else
echo "--sycl-fp16 only valid for llama.cpp"
exit 1
fi
;;
--sycl-nvidia )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl,sycl_nvidia"
else
echo "--sycl-nvidia only valid for llama.cpp"
exit 1
fi
;;
--sycl-nvidia-fp16 )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl,sycl_nvidia,sycl_fp16"
else
echo "--sycl-nvidia-fp16 only valid for llama.cpp"
exit 1
fi
;;
--trace )
R_LOG=info,tokenizers=error,rllm=trace,aicirt=info,llama_cpp_low=trace
;;
Expand Down Expand Up @@ -84,7 +116,7 @@ if [ "$CPP" = 1 ] ; then
* )
SELF="server.sh"
cat <<EOF
usage: $SELF [--loop] [--cuda] [--debug] [model_name] [rllm_args...]
usage: $SELF [--loop] [--cuda] [--sycl] [--sycl-fp16] [--sycl-nvidia] [--debug] [model_name] [rllm_args...]

model_name can a HuggingFace URL pointing to a .gguf file, or one of the following:

Expand All @@ -96,9 +128,13 @@ model_name can a HuggingFace URL pointing to a .gguf file, or one of the followi

Additionally, "$SELF build" will just build the server, and not run a model.

--cuda try to build llama.cpp against installed CUDA
--loop restart server when it crashes and store logs in ./logs
--debug don't build in --release mode
--cuda try to build llama.cpp against installed CUDA
--sycl try to build llama.cpp against SYCL with fp32 support (Make sure the required sycl environement variables are set)
--sycl-fp16 try to build llama.cpp against SYCL with fp16 support
--sycl-nvidia try to build llama.cpp against SYCL with nvidia support
--sycl-nvidia-fp16 try to build llama.cpp against SYCL with fp16 and nvidia support
--loop restart server when it crashes and store logs in ./logs
--debug don't build in --release mode

Try $SELF phi2 --help to see available rllm_args
EOF
Expand Down
3 changes: 3 additions & 0 deletions rllm/rllm-llamacpp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ path = "src/rllm-llamacpp.rs"
[features]
default = []
cuda = ["llama_cpp_low/cuda"]
sycl = ["llama_cpp_low/sycl"]
sycl_fp16 = ["llama_cpp_low/sycl_fp16"]
sycl_nvidia = ["llama_cpp_low/sycl_nvidia"]