diff --git a/scripts/configuration.py b/scripts/configuration.py index bd1410d..8360281 100644 --- a/scripts/configuration.py +++ b/scripts/configuration.py @@ -34,3 +34,6 @@ # Parameters for the scraper CHUNK_SIZE = 1_500 CHUNK_OVERLAP = 10 + +# Sentence transformer model +SENTENCE_TRANSFORMER_MODEL = "thenlper/gte-large" diff --git a/scripts/train_retrievers.py b/scripts/train_retrievers.py index a9ee051..1dad25e 100644 --- a/scripts/train_retrievers.py +++ b/scripts/train_retrievers.py @@ -36,7 +36,7 @@ from ragger_duck.scraping import APINumPyDocExtractor embedding = SentenceTransformer( - model_name_or_path="thenlper/gte-large", + model_name_or_path=config.SENTENCE_TRANSFORMER_MODEL, cache_folder=config.CACHE_PATH, device=DEVICE, ) @@ -91,7 +91,7 @@ from ragger_duck.scraping import UserGuideDocExtractor embedding = SentenceTransformer( - model_name_or_path="thenlper/gte-large", + model_name_or_path=config.SENTENCE_TRANSFORMER_MODEL, cache_folder=config.CACHE_PATH, device=DEVICE, ) @@ -148,7 +148,7 @@ from ragger_duck.scraping import GalleryExampleExtractor embedding = SentenceTransformer( - model_name_or_path="thenlper/gte-large", + model_name_or_path=config.SENTENCE_TRANSFORMER_MODEL, cache_folder=config.CACHE_PATH, device=DEVICE, )