diff --git a/docs/core_docs/docs/integrations/chat/llama_cpp.mdx b/docs/core_docs/docs/integrations/chat/llama_cpp.mdx
index dbff7089dece..5123b96be70e 100644
--- a/docs/core_docs/docs/integrations/chat/llama_cpp.mdx
+++ b/docs/core_docs/docs/integrations/chat/llama_cpp.mdx
@@ -22,11 +22,11 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt
npm install -S node-llama-cpp@3 @langchain/community @langchain/core
```
-You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
+You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/).
-For advice on getting and preparing `llama2` see the documentation for the LLM version of this module.
+For advice on getting and preparing `llama3` see the documentation for the LLM version of this module.
A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`.
@@ -51,7 +51,7 @@ import SystemExample from "@examples/models/chat/integration_llama_cpp_system.ts
### Chains
-This module can also be used with chains, note that using more complex chains will require suitably powerful version of `llama2` such as the 70B version.
+This module can also be used with chains, note that using more complex chains will require suitably powerful version of `llama3` such as the 70B version.
import ChainExample from "@examples/models/chat/integration_llama_cpp_chain.ts";
@@ -65,7 +65,7 @@ import StreamExample from "@examples/models/chat/integration_llama_cpp_stream.ts
{StreamExample}
-Or you can provide multiple messages, note that this takes the input and then submits a Llama2 formatted prompt to the model.
+Or you can provide multiple messages, note that this takes the input and then submits a Llama3 formatted prompt to the model.
import StreamMultiExample from "@examples/models/chat/integration_llama_cpp_stream_multi.ts";
diff --git a/docs/core_docs/docs/integrations/llms/llama_cpp.mdx b/docs/core_docs/docs/integrations/llms/llama_cpp.mdx
index 508229ac13b6..0601edcbe0c4 100644
--- a/docs/core_docs/docs/integrations/llms/llama_cpp.mdx
+++ b/docs/core_docs/docs/integrations/llms/llama_cpp.mdx
@@ -26,40 +26,28 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt
npm install @langchain/community @langchain/core
```
-You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
+You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/).
A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`.
-## Guide to installing Llama2
+## Guide to installing Llama3
-Getting a local Llama2 model running on your machine is a pre-req so this is a quick guide to getting and building Llama 7B (the smallest) and then quantizing it so that it will run comfortably on a laptop. To do this you will need `python3` on your machine (3.11 is recommended), also `gcc` and `make` so that `llama.cpp` can be built.
+Getting a local Llama3 model running on your machine is a pre-req so this is a quick guide to getting and building Llama 3.1-8B (the smallest) and then quantizing it so that it will run comfortably on a laptop. To do this you will need `python3` on your machine (3.11 is recommended), also `gcc` and `make` so that `llama.cpp` can be built.
-### Getting the Llama2 models
+### Getting the Llama3 models
-To get a copy of Llama2 you need to visit [Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and request access to their models. Once Meta AI grant you access, you will receive an email containing a unique URL to access the files, this will be needed in the next steps.
+To get a copy of Llama3 you need to visit [Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and request access to their models. Once Meta AI grant you access, you will receive an email containing a unique URL to access the files, this will be needed in the next steps.
Now create a directory to work in, for example:
```
-mkdir llama2
-cd llama2
+mkdir llama3
+cd llama3
```
-Now we need to get the Meta AI `llama` repo in place so we can download the model.
-
-```
-git clone https://github.com/facebookresearch/llama.git
-```
-
-Once we have this in place we can change into this directory and run the downloader script to get the model we will be working with. Note: From here on its assumed that the model in use is `llama-2–7b`, if you select a different model don't forget to change the references to the model accordingly.
-
-```
-cd llama
-/bin/bash ./download.sh
-```
-
-This script will ask you for the URL that Meta AI sent to you (see above), you will also select the model to download, in this case we used `llama-2–7b`. Once this step has completed successfully (this can take some time, the `llama-2–7b` model is around 13.5Gb) there should be a new `llama-2–7b` directory containing the model and other files.
+Now we need to go to the Meta AI `llama-models` repo, which can be found [here](https://github.com/meta-llama/llama-models). In the repo, there are instructions to download the model of your choice, and you should use the unique URL that was received in your email.
+The rest of the tutorial assumes that you have downloaded `Llama3.1-8B`, but any model from here on out should work. Upon downloading the model, make sure to save the model download path, this will be used for later.
### Converting and quantizing the model
@@ -71,38 +59,42 @@ git clone https://github.com/ggerganov/llama.cpp.git
cd llama.cpp
```
-Now we need to build the `llama.cpp` tools and set up our `python` environment. In these steps it's assumed that your install of python can be run using `python3` and that the virtual environment can be called `llama2`, adjust accordingly for your own situation.
+Now we need to build the `llama.cpp` tools and set up our `python` environment. In these steps it's assumed that your install of python can be run using `python3` and that the virtual environment can be called `llama3`, adjust accordingly for your own situation.
```
-make
-python3 -m venv llama2
-source llama2/bin/activate
+cmake -B build
+cmake --build build --config Release
+python3 -m venv llama3
+source llama3/bin/activate
```
-After activating your llama2 environment you should see `(llama2)` prefixing your command prompt to let you know this is the active environment. Note: if you need to come back to build another model or re-quantize the model don't forget to activate the environment again also if you update `llama.cpp` you will need to rebuild the tools and possibly install new or updated dependencies! Now that we have an active python environment, we need to install the python dependencies.
+After activating your llama3 environment you should see `(llama3)` prefixing your command prompt to let you know this is the active environment. Note: if you need to come back to build another model or re-quantize the model don't forget to activate the environment again also if you update `llama.cpp` you will need to rebuild the tools and possibly install new or updated dependencies! Now that we have an active python environment, we need to install the python dependencies.
```
python3 -m pip install -r requirements.txt
```
-Having done this, we can start converting and quantizing the Llama2 model ready for use locally via `llama.cpp`.
-First, we need to convert the model, prior to the conversion let's create a directory to store it in.
+Having done this, we can start converting and quantizing the Llama3 model ready for use locally via `llama.cpp`. A conversion to a Hugging Face model is needed, followed by a conversion to a GGUF model.
+First, we need to locate the path with the following script `convert_llama_weights_to_hf.py`. Copy and paste this script into your current working directory. Note that using the script may need you to pip install extra dependencies, do so as needed.
+Then, we need to convert the model, prior to the conversion let's create directories to store our Hugging Face conversion and our final model.
```
-mkdir models/7B
-python3 convert.py --outfile models/7B/gguf-llama2-f16.bin --outtype f16 ../../llama2/llama/llama-2-7b --vocab-dir ../../llama2/llama/llama-2-7b
+mkdir models/8B
+mkdir models/8B-GGUF
+python3 convert_llama_weights_to_hf.py --model_size 8B --input_dir --output_dir models/8B --llama_version 3
+python3 convert_hf_to_gguf.py --outtype f16 --outfile models/8B-GGUF/gguf-llama3-f16.bin models/8B
```
-This should create a converted model called `gguf-llama2-f16.bin` in the directory we just created. Note that this is just a converted model so it is also around 13.5Gb in size, in the next step we will quantize it down to around 4Gb.
+This should create a converted Hugging Face model and the final GGUF model in the directories we have created. Note that this is just a converted model so it is also around 16Gb in size, in the next step we will quantize it down to around 4Gb.
```
-./quantize ./models/7B/gguf-llama2-f16.bin ./models/7B/gguf-llama2-q4_0.bin q4_0
+./build/bin/llama-quantize ./models/8B-GGUF/gguf-llama3-f16.bin ./models/8B-GGUF/gguf-llama3-Q4_0.bin Q4_0
```
-Running this should result in a new model being created in the `models\7B` directory, this one called `gguf-llama2-q4_0.bin`, this is the model we can use with langchain. You can validate this model is working by testing it using the `llama.cpp` tools.
+Running this should result in a new model being created in the `models\8B-GGUF` directory, this one called `gguf-llama3-Q4_0.bin`, this is the model we can use with langchain. You can validate this model is working by testing it using the `llama.cpp` tools.
```
-./main -m ./models/7B/gguf-llama2-q4_0.bin -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f ./prompts/chat-with-bob.txt
+./build/bin/llama-cli -m ./models/8B-GGUF/gguf-llama3-Q4_0.bin -cnv -p "You are a helpful assistant"
```
Running this command fires up the model for a chat session. BTW if you are running out of disk space this small model is the only one we need, so you can backup and/or delete the original and converted 13.5Gb models.
diff --git a/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx b/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx
index 35ec34988a9c..3ed1f10dc272 100644
--- a/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx
+++ b/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx
@@ -26,11 +26,11 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt
npm install @langchain/community @langchain/core
```
-You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
+You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/).
-For advice on getting and preparing `llama2` see the documentation for the LLM version of this module.
+For advice on getting and preparing `llama3` see the documentation for the LLM version of this module.
A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`.
@@ -38,7 +38,7 @@ A note to LangChain.js contributors: if you want to run the tests associated wit
### Basic use
-We need to provide a path to our local Llama2 model, also the `embeddings` property is always set to `true` in this module.
+We need to provide a path to our local Llama3 model, also the `embeddings` property is always set to `true` in this module.
import CodeBlock from "@theme/CodeBlock";
import BasicExample from "@examples/embeddings/llama_cpp_basic.ts";
diff --git a/examples/src/embeddings/llama_cpp_basic.ts b/examples/src/embeddings/llama_cpp_basic.ts
index cf89ffd4262f..a26e6877d0d5 100644
--- a/examples/src/embeddings/llama_cpp_basic.ts
+++ b/examples/src/embeddings/llama_cpp_basic.ts
@@ -1,6 +1,6 @@
import { LlamaCppEmbeddings } from "@langchain/community/embeddings/llama_cpp";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const embeddings = await LlamaCppEmbeddings.initialize({
modelPath: llamaPath,
diff --git a/examples/src/embeddings/llama_cpp_docs.ts b/examples/src/embeddings/llama_cpp_docs.ts
index 19e9ee404abe..b416c4769857 100644
--- a/examples/src/embeddings/llama_cpp_docs.ts
+++ b/examples/src/embeddings/llama_cpp_docs.ts
@@ -1,6 +1,6 @@
import { LlamaCppEmbeddings } from "@langchain/community/embeddings/llama_cpp";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const documents = ["Hello World!", "Bye Bye!"];
diff --git a/examples/src/models/chat/integration_llama_cpp.ts b/examples/src/models/chat/integration_llama_cpp.ts
index bdd2f7818c3c..18a112fa17e9 100644
--- a/examples/src/models/chat/integration_llama_cpp.ts
+++ b/examples/src/models/chat/integration_llama_cpp.ts
@@ -1,7 +1,7 @@
import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
import { HumanMessage } from "@langchain/core/messages";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath });
diff --git a/examples/src/models/chat/integration_llama_cpp_chain.ts b/examples/src/models/chat/integration_llama_cpp_chain.ts
index 3499929b7ef7..1a016ded6da6 100644
--- a/examples/src/models/chat/integration_llama_cpp_chain.ts
+++ b/examples/src/models/chat/integration_llama_cpp_chain.ts
@@ -2,7 +2,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
import { LLMChain } from "langchain/chains";
import { PromptTemplate } from "@langchain/core/prompts";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const model = await ChatLlamaCpp.initialize({
modelPath: llamaPath,
diff --git a/examples/src/models/chat/integration_llama_cpp_stream.ts b/examples/src/models/chat/integration_llama_cpp_stream.ts
index 33697fedd876..addd2dbf2cac 100644
--- a/examples/src/models/chat/integration_llama_cpp_stream.ts
+++ b/examples/src/models/chat/integration_llama_cpp_stream.ts
@@ -1,6 +1,6 @@
import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const model = await ChatLlamaCpp.initialize({
modelPath: llamaPath,
diff --git a/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts b/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts
index f452b9764fd8..3a7d7217f7dd 100644
--- a/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts
+++ b/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts
@@ -1,7 +1,7 @@
import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
import { SystemMessage, HumanMessage } from "@langchain/core/messages";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const model = await ChatLlamaCpp.initialize({
modelPath: llamaPath,
diff --git a/examples/src/models/chat/integration_llama_cpp_stream_multi.ts b/examples/src/models/chat/integration_llama_cpp_stream_multi.ts
index 9d2d337d0284..d2fce6da9919 100644
--- a/examples/src/models/chat/integration_llama_cpp_stream_multi.ts
+++ b/examples/src/models/chat/integration_llama_cpp_stream_multi.ts
@@ -1,7 +1,7 @@
import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
import { SystemMessage, HumanMessage } from "@langchain/core/messages";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const llamaCpp = await ChatLlamaCpp.initialize({
modelPath: llamaPath,
diff --git a/examples/src/models/chat/integration_llama_cpp_system.ts b/examples/src/models/chat/integration_llama_cpp_system.ts
index ec53a8aac4b7..a97174941d95 100644
--- a/examples/src/models/chat/integration_llama_cpp_system.ts
+++ b/examples/src/models/chat/integration_llama_cpp_system.ts
@@ -1,7 +1,7 @@
import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
import { SystemMessage, HumanMessage } from "@langchain/core/messages";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath });
diff --git a/examples/src/models/llm/llama_cpp.ts b/examples/src/models/llm/llama_cpp.ts
index da7d8b487930..f7a7f696ff66 100644
--- a/examples/src/models/llm/llama_cpp.ts
+++ b/examples/src/models/llm/llama_cpp.ts
@@ -1,6 +1,6 @@
import { LlamaCpp } from "@langchain/community/llms/llama_cpp";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const question = "Where do Llamas come from?";
const model = await LlamaCpp.initialize({ modelPath: llamaPath });
diff --git a/examples/src/models/llm/llama_cpp_stream.ts b/examples/src/models/llm/llama_cpp_stream.ts
index 022da280ff5d..c5465d3fd76d 100644
--- a/examples/src/models/llm/llama_cpp_stream.ts
+++ b/examples/src/models/llm/llama_cpp_stream.ts
@@ -1,6 +1,6 @@
import { LlamaCpp } from "@langchain/community/llms/llama_cpp";
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
const model = await LlamaCpp.initialize({
modelPath: llamaPath,
diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts
index 960228c1bb29..1752652dadff 100644
--- a/libs/langchain-community/src/chat_models/llama_cpp.ts
+++ b/libs/langchain-community/src/chat_models/llama_cpp.ts
@@ -47,12 +47,12 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
* To use this model you need to have the `node-llama-cpp` module installed.
* This can be installed using `npm install -S node-llama-cpp` and the minimum
* version supported in version 2.0.0.
- * This also requires that have a locally built version of Llama2 installed.
+ * This also requires that have a locally built version of Llama3 installed.
* @example
* ```typescript
* // Initialize the ChatLlamaCpp model with the path to the model binary file.
* const model = await ChatLlamaCpp.initialize({
- * modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin",
+ * modelPath: "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin",
* temperature: 0.5,
* });
*
diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts
index 24fcc529a864..0d344c605334 100644
--- a/libs/langchain-community/src/llms/llama_cpp.ts
+++ b/libs/langchain-community/src/llms/llama_cpp.ts
@@ -42,7 +42,7 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions {
* To use this model you need to have the `node-llama-cpp` module installed.
* This can be installed using `npm install -S node-llama-cpp` and the minimum
* version supported in version 2.0.0.
- * This also requires that have a locally built version of Llama2 installed.
+ * This also requires that have a locally built version of Llama3 installed.
*/
export class LlamaCpp extends LLM {
lc_serializable = true;