Open
Description
Can't utilize GPU on Mac with
llama_cpp_rs = { git = "https://github.com/mdrokz/rust-llama.cpp", version = "0.3.0", features = [
"metal",
] }
Code
use llama_cpp_rs::{
options::{ModelOptions, PredictOptions},
LLama,
};
fn main() {
let model_options = ModelOptions {
n_gpu_layers: 1,
..Default::default()
};
let llama = LLama::new("zephyr-7b-alpha.Q2_K.gguf".into(), &model_options);
println!("llama: {:?}", llama);
let predict_options = PredictOptions {
tokens: 0,
threads: 14,
top_k: 90,
top_p: 0.86,
token_callback: Some(Box::new(|token| {
println!("token1: {}", token);
true
})),
..Default::default()
};
llama
.unwrap()
.predict(
"what are the national animals of india".into(),
predict_options,
)
.unwrap();
}
Error
llama_new_context_with_model: kv self size = 64.00 MB
llama_new_context_with_model: ggml_metal_init() failed
llama: Err("Failed to load model")
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: "Failed to load model"', src/main.rs:40:10