diff --git a/model2vec/distill/distillation.py b/model2vec/distill/distillation.py index 2d8001f..35a98e3 100644 --- a/model2vec/distill/distillation.py +++ b/model2vec/distill/distillation.py @@ -3,7 +3,6 @@ import numpy as np from huggingface_hub import model_info -from huggingface_hub.utils._errors import RepositoryNotFoundError from sklearn.decomposition import PCA from tokenizers.models import BPE, Unigram from transformers import AutoModel, AutoTokenizer, PreTrainedModel, PreTrainedTokenizerFast @@ -16,6 +15,14 @@ from model2vec.distill.utils import select_optimal_device from model2vec.model import StaticModel +try: + # For huggingface_hub>=0.25.0 + from huggingface_hub.errors import RepositoryNotFoundError +except ImportError: + # For huggingface_hub<0.25.0 + from huggingface_hub.utils._errors import RepositoryNotFoundError + + logger = logging.getLogger(__name__) diff --git a/model2vec/distill/inference.py b/model2vec/distill/inference.py index 6459336..e480cbf 100644 --- a/model2vec/distill/inference.py +++ b/model2vec/distill/inference.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import inspect import logging from pathlib import Path from typing import Protocol @@ -127,20 +128,27 @@ def create_output_embeddings_from_model_name( for batch_idx in tqdm(range(0, len(stacked), _DEFAULT_BATCH_SIZE)): batch = stacked[batch_idx : batch_idx + _DEFAULT_BATCH_SIZE].to(model.device) with torch.no_grad(): - # NOTE: we create these masks because nomic embed requires them. - # Normally, we could set them to None - token_type_ids = torch.zeros_like(batch) attention_mask = torch.ones_like(batch) - encoded: BaseModelOutputWithPoolingAndCrossAttentions = model( - input_ids=batch.to(device), attention_mask=attention_mask, token_type_ids=token_type_ids - ) - out: torch.Tensor = encoded.last_hidden_state + # Prepare model inputs + model_inputs = {"input_ids": batch.to(device), "attention_mask": attention_mask} + + # Add token_type_ids only if the model supports it + if "token_type_ids" in inspect.getfullargspec(model.forward).args: + model_inputs["token_type_ids"] = torch.zeros_like(batch) + + # Perform the forward pass + encoded_output: BaseModelOutputWithPoolingAndCrossAttentions = model(**model_inputs) + out: torch.Tensor = encoded_output.last_hidden_state # NOTE: If the dtype is bfloat 16, we convert to float32, # because numpy does not suport bfloat16 # See here: https://github.com/numpy/numpy/issues/19808 if out.dtype == torch.bfloat16: out = out.float() - intermediate_weights.append(out[:, 1].cpu().numpy()) + + # Add the output to the intermediate weights + intermediate_weights.append(out[:, 1].detach().cpu().numpy()) + + # Concatenate the intermediate weights out_weights = np.concatenate(intermediate_weights) return tokenizer.convert_ids_to_tokens(ids), out_weights diff --git a/pyproject.toml b/pyproject.toml index 61ef3a5..ae7af19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "model2vec" description = "Distill a Small Fast Model from any Sentence Transformer" readme = { file = "README.md", content-type = "text/markdown" } license = { file = "LICENSE" } -version = "0.2.3" +version = "0.2.4" requires-python = ">=3.10" authors = [{ name = "Stéphan Tulkens", email = "stephantul@gmail.com"}, {name = "Thomas van Dongen", email = "thomas123@live.nl"}] diff --git a/tests/conftest.py b/tests/conftest.py index ae83008..f7d2222 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,7 +44,7 @@ def to(self, device: str) -> MockPreTrainedModel: self.device = device return self - def __call__(self, *args: Any, **kwargs: Any) -> Any: + def forward(self, *args: Any, **kwargs: Any) -> Any: # Simulate a last_hidden_state output for a transformer model batch_size, seq_length = kwargs["input_ids"].shape # Return a tensor of shape (batch_size, seq_length, 768) @@ -56,6 +56,10 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: }, ) + def __call__(self, *args: Any, **kwargs: Any) -> Any: + # Simply call the forward method to simulate the same behavior as transformers models + return self.forward(*args, **kwargs) + return MockPreTrainedModel() diff --git a/tests/test_distillation.py b/tests/test_distillation.py index b4abac2..36da266 100644 --- a/tests/test_distillation.py +++ b/tests/test_distillation.py @@ -4,13 +4,19 @@ import numpy as np import pytest -from huggingface_hub.utils._errors import RepositoryNotFoundError from pytest import LogCaptureFixture from transformers import AutoModel, BertTokenizerFast from model2vec.distill.distillation import _clean_vocabulary, _post_process_embeddings, distill, distill_from_model from model2vec.model import StaticModel +try: + # For huggingface_hub>=0.25.0 + from huggingface_hub.errors import RepositoryNotFoundError +except ImportError: + # For huggingface_hub<0.25.0 + from huggingface_hub.utils._errors import RepositoryNotFoundError + rng = np.random.default_rng() diff --git a/uv.lock b/uv.lock index 73c7d3b..30ee1f4 100644 --- a/uv.lock +++ b/uv.lock @@ -433,7 +433,7 @@ wheels = [ [[package]] name = "model2vec" -version = "0.2.2" +version = "0.2.4" source = { editable = "." } dependencies = [ { name = "click" },