diff --git a/Makefile b/Makefile
index 1e4de71b493a..f48eaaa0ece3 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=61408e7fad082dc44a11c8a9f1398da4837aad44
+CPPLLAMA_VERSION?=418f5eef262cea07c2af4f45ee6a88d882221fcb
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=19dca2bb1464326587cbeb7af00f93c4a59b01fd
+WHISPER_CPP_VERSION?=0377596b77a3602e36430320cbe45f8c305ef04a
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
diff --git a/core/http/app_test.go b/core/http/app_test.go
index 51663a0db87e..653a03d30587 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -562,7 +562,7 @@ var _ = Describe("API test", func() {
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
- }, "360s", "10s").Should(Equal(true))
+ }, "900s", "10s").Should(Equal(true))
By("testing chat")
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
diff --git a/docs/layouts/partials/docs/top-header.html b/docs/layouts/partials/docs/top-header.html
new file mode 100644
index 000000000000..375ff779ea3e
--- /dev/null
+++ b/docs/layouts/partials/docs/top-header.html
@@ -0,0 +1,133 @@
+
+
+
\ No newline at end of file
diff --git a/gallery/index.yaml b/gallery/index.yaml
index cee71257a40d..39bc9e520ed6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -861,6 +861,23 @@
- filename: SmolLM-1.7B-Instruct.Q4_K_M.gguf
sha256: 2b07eb2293ed3fc544a9858beda5bfb03dcabda6aa6582d3c85768c95f498d28
uri: huggingface://MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF/SmolLM-1.7B-Instruct.Q4_K_M.gguf
+- !!merge <<: *smollm
+ name: "smollm2-1.7b-instruct"
+ icon: https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/y45hIMNREW7w_XpHYB_0q.png
+ urls:
+ - https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct
+ - https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF
+ description: |
+ SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.
+
+ The 1.7B variant demonstrates significant advances over its predecessor SmolLM1-1.7B, particularly in instruction following, knowledge, reasoning, and mathematics. It was trained on 11 trillion tokens using a diverse dataset combination: FineWeb-Edu, DCLM, The Stack, along with new mathematics and coding datasets that we curated and will release soon. We developed the instruct version through supervised fine-tuning (SFT) using a combination of public datasets and our own curated datasets. We then applied Direct Preference Optimization (DPO) using UltraFeedback.
+ overrides:
+ parameters:
+ model: smollm2-1.7b-instruct-q4_k_m.gguf
+ files:
+ - filename: smollm2-1.7b-instruct-q4_k_m.gguf
+ sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33
+ uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf
- &llama31
## LLama3.1
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
@@ -2002,6 +2019,20 @@
- filename: Llama-3.1-Hawkish-8B-Q4_K_M.gguf
sha256: 613693936bbe641f41560151753716ba549ca052260fc5c0569e943e0bb834c3
uri: huggingface://bartowski/Llama-3.1-Hawkish-8B-GGUF/Llama-3.1-Hawkish-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+ name: "llama3.1-bestmix-chem-einstein-8b"
+ urls:
+ - https://huggingface.co/ZeroXClem/Llama3.1-BestMix-Chem-Einstein-8B
+ - https://huggingface.co/QuantFactory/Llama3.1-BestMix-Chem-Einstein-8B-GGUF
+ description: |
+ Llama3.1-BestMix-Chem-Einstein-8B is an innovative, meticulously blended model designed to excel in instruction-following, chemistry-focused tasks, and long-form conversational generation. This model fuses the best qualities of multiple Llama3-based architectures, making it highly versatile for both general and specialized tasks. 💻🧠✨
+ overrides:
+ parameters:
+ model: Llama3.1-BestMix-Chem-Einstein-8B.Q4_K_M.gguf
+ files:
+ - filename: Llama3.1-BestMix-Chem-Einstein-8B.Q4_K_M.gguf
+ sha256: 1a53aa7124c731f33b0b616d7c66a6f78c6a133240acd9e3227f1188f743c1ee
+ uri: huggingface://QuantFactory/Llama3.1-BestMix-Chem-Einstein-8B-GGUF/Llama3.1-BestMix-Chem-Einstein-8B.Q4_K_M.gguf
- &deepseek
## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -2696,6 +2727,24 @@
- filename: Darkens-8B.Q4_K_M.gguf
sha256: f56a483e10fd00957460adfc16ee462cecac892a4fb44dc59e466e68a360fd42
uri: huggingface://QuantFactory/Darkens-8B-GGUF/Darkens-8B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+ url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+ name: "starcannon-unleashed-12b-v1.0"
+ icon: https://cdn-uploads.huggingface.co/production/uploads/6720ed503a24966ac66495e8/HXc0AxPLkoIC1fy0Pb3Pb.png
+ urls:
+ - https://huggingface.co/VongolaChouko/Starcannon-Unleashed-12B-v1.0
+ - https://huggingface.co/QuantFactory/Starcannon-Unleashed-12B-v1.0-GGUF
+ description: |
+ This is a merge of pre-trained language models created using mergekit.
+ MarinaraSpaghetti_NemoMix-Unleashed-12B
+ Nothingiisreal_MN-12B-Starcannon-v3
+ overrides:
+ parameters:
+ model: Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
+ files:
+ - filename: Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
+ sha256: b32c6582d75d2f1d67d567badc691a1338dd1a016c71efbfaf4c91812f398f0e
+ uri: huggingface://QuantFactory/Starcannon-Unleashed-12B-v1.0-GGUF/Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"