Skip to content

Commit

Permalink
Fix GenerationMixin warning for AdapterModel classes (#787)
Browse files Browse the repository at this point in the history
Fixes the following warning for all AdapterModels:
```
LlamaAdapterModel has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
```
  • Loading branch information
calpt authored Jan 27, 2025
1 parent 1dcac5c commit ea2b639
Show file tree
Hide file tree
Showing 17 changed files with 62 additions and 17 deletions.
5 changes: 4 additions & 1 deletion src/adapters/models/bart/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch

from transformers.generation import GenerationMixin
from transformers.models.bart.modeling_bart import (
BART_INPUTS_DOCSTRING,
BART_START_DOCSTRING,
Expand All @@ -18,7 +19,9 @@
@add_start_docstrings(
"BART Model with the option to add multiple flexible prediction heads on top.", BART_START_DOCSTRING
)
class BartAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BartPreTrainedModel):
class BartAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BartPreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/bert/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.bert.modeling_bert import (
BERT_INPUTS_DOCSTRING,
BERT_START_DOCSTRING,
Expand All @@ -16,7 +17,9 @@
"""Bert Model transformer with the option to add multiple flexible heads on top.""",
BERT_START_DOCSTRING,
)
class BertAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertPreTrainedModel):
class BertAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertPreTrainedModel, GenerationMixin
):

head_types = [
"classification",
Expand Down
3 changes: 2 additions & 1 deletion src/adapters/models/bert_generation/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.bert_generation.modeling_bert_generation import (
BERT_GENERATION_INPUTS_DOCSTRING,
BERT_GENERATION_START_DOCSTRING,
Expand All @@ -17,7 +18,7 @@
BERT_GENERATION_START_DOCSTRING,
)
class BertGenerationAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertGenerationPreTrainedModel
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertGenerationPreTrainedModel, GenerationMixin
):
_keys_to_ignore_on_load_unexpected = [r"lm_head.bias"]

Expand Down
3 changes: 2 additions & 1 deletion src/adapters/models/distilbert/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch.nn as nn

from transformers.generation import GenerationMixin
from transformers.models.distilbert.modeling_distilbert import (
DISTILBERT_INPUTS_DOCSTRING,
DISTILBERT_START_DOCSTRING,
Expand All @@ -18,7 +19,7 @@
DISTILBERT_START_DOCSTRING,
)
class DistilBertAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, DistilBertPreTrainedModel
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, DistilBertPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/electra/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.electra.modeling_electra import (
ELECTRA_INPUTS_DOCSTRING,
ELECTRA_START_DOCSTRING,
Expand All @@ -16,7 +17,9 @@
"""Electra Model transformer with the option to add multiple flexible heads on top.""",
ELECTRA_START_DOCSTRING,
)
class ElectraAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, ElectraPreTrainedModel):
class ElectraAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, ElectraPreTrainedModel, GenerationMixin
):

head_types = [
"classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/gpt2/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.gpt2.modeling_gpt2 import GPT2_START_DOCSTRING, GPT2Model, GPT2PreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -25,7 +26,9 @@
""",
GPT2_START_DOCSTRING,
)
class GPT2AdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPT2PreTrainedModel):
class GPT2AdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPT2PreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/gptj/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.gptj.modeling_gptj import GPTJ_START_DOCSTRING, GPTJModel, GPTJPreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -25,7 +26,9 @@
""",
GPTJ_START_DOCSTRING,
)
class GPTJAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPTJPreTrainedModel):
class GPTJAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPTJPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/llama/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.llama.modeling_llama import LLAMA_START_DOCSTRING, LlamaModel, LlamaPreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -26,7 +27,9 @@
""",
LLAMA_START_DOCSTRING,
)
class LlamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, LlamaPreTrainedModel):
class LlamaAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, LlamaPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/mbart/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch

from transformers.generation import GenerationMixin
from transformers.models.mbart.modeling_mbart import (
MBART_INPUTS_DOCSTRING,
MBART_START_DOCSTRING,
Expand All @@ -19,7 +20,9 @@
@add_start_docstrings(
"MBART Model with the option to add multiple flexible prediction heads on top.", MBART_START_DOCSTRING
)
class MBartAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MBartPreTrainedModel):
class MBartAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MBartPreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/mistral/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.mistral.modeling_mistral import MISTRAL_START_DOCSTRING, MistralModel, MistralPreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -25,7 +26,9 @@
""",
MISTRAL_START_DOCSTRING,
)
class MistralAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MistralPreTrainedModel):
class MistralAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MistralPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/mt5/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.mt5.modeling_mt5 import (
MT5_INPUTS_DOCSTRING,
MT5_START_DOCSTRING,
Expand All @@ -22,7 +23,9 @@
@add_start_docstrings(
"MT5 Model with the option to add multiple flexible prediction heads on top.", MT5_START_DOCSTRING
)
class MT5AdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MT5PreTrainedModel):
class MT5AdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MT5PreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/plbart/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch

from transformers.generation import GenerationMixin
from transformers.models.plbart.modeling_plbart import (
PLBART_INPUTS_DOCSTRING,
PLBART_START_DOCSTRING,
Expand All @@ -18,7 +19,9 @@
@add_start_docstrings(
"PLBART Model with the option to add multiple flexible prediction heads on top.", PLBART_START_DOCSTRING
)
class PLBartAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, PLBartPreTrainedModel):
class PLBartAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, PLBartPreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/roberta/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.roberta.modeling_roberta import (
ROBERTA_INPUTS_DOCSTRING,
ROBERTA_START_DOCSTRING,
Expand All @@ -16,7 +17,9 @@
"""Roberta Model transformer with the option to add multiple flexible heads on top.""",
ROBERTA_START_DOCSTRING,
)
class RobertaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, RobertaPreTrainedModel):
class RobertaAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, RobertaPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/t5/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.t5.modeling_t5 import T5_INPUTS_DOCSTRING, T5_START_DOCSTRING, T5Model, T5PreTrainedModel
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward

Expand All @@ -15,7 +16,9 @@


@add_start_docstrings("T5 Model with the option to add multiple flexible prediction heads on top.", T5_START_DOCSTRING)
class T5AdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, T5PreTrainedModel):
class T5AdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, T5PreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/whisper/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch

from transformers import EncoderDecoderCache, StaticCache
from transformers.generation import GenerationMixin
from transformers.models.whisper.modeling_whisper import (
WHISPER_INPUTS_DOCSTRING,
WHISPER_START_DOCSTRING,
Expand All @@ -19,7 +20,9 @@
@add_start_docstrings(
"WHISPER Model with the option to add multiple flexible prediction heads on top.", WHISPER_START_DOCSTRING
)
class WhisperAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, WhisperPreTrainedModel):
class WhisperAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, WhisperPreTrainedModel, GenerationMixin
):
_tied_weights_keys = []
head_types = ["seq2seq_lm"]

Expand Down
3 changes: 2 additions & 1 deletion src/adapters/models/xlm_roberta/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.xlm_roberta.modeling_xlm_roberta import (
XLM_ROBERTA_INPUTS_DOCSTRING,
XLM_ROBERTA_START_DOCSTRING,
Expand All @@ -17,7 +18,7 @@
XLM_ROBERTA_START_DOCSTRING,
)
class XLMRobertaAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XLMRobertaPreTrainedModel
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XLMRobertaPreTrainedModel, GenerationMixin
):

head_types = [
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/xmod/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.xmod.modeling_xmod import (
XMOD_INPUTS_DOCSTRING,
XMOD_START_DOCSTRING,
Expand All @@ -20,7 +21,9 @@
"""X-MOD Model transformer with the option to add multiple flexible heads on top.""",
XMOD_START_DOCSTRING,
)
class XmodAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XmodPreTrainedModel):
class XmodAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XmodPreTrainedModel, GenerationMixin
):

head_types = [
"classification",
Expand Down

0 comments on commit ea2b639

Please sign in to comment.