Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added custom model inference. #437

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
5909d4a
Added first version of custom model.
JoelNiklaus Dec 11, 2024
a2d6b63
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 11, 2024
2283c89
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 11, 2024
9563fab
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 12, 2024
319d482
Merge branch 'main' into add-custom-model
clefourrier Dec 12, 2024
464edfe
Merge branch 'main' into add-custom-model
clefourrier Dec 12, 2024
6096042
Moved custom model config.
JoelNiklaus Dec 12, 2024
a7e1fe5
Added warning.
JoelNiklaus Dec 12, 2024
24b8bd3
Added custom model example for google translate.
JoelNiklaus Dec 12, 2024
c177a8e
Added documentation for custom model config.
JoelNiklaus Dec 12, 2024
d712cdb
Added docs.
JoelNiklaus Dec 12, 2024
7553147
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 12, 2024
b41949c
Fixed path error.
JoelNiklaus Dec 12, 2024
aaaadb0
Fixed doc error.
JoelNiklaus Dec 12, 2024
c85065f
Added requirements file for google translate.
JoelNiklaus Dec 12, 2024
f1103da
Moved model loading function to reduce merge conflicts with litellm i…
JoelNiklaus Dec 12, 2024
71f871e
Added diskcache and get source and target language from the task name.
JoelNiklaus Dec 12, 2024
d1af518
Fixed problem with removing languages in the context.
JoelNiklaus Dec 12, 2024
2511158
Added retry logic.
JoelNiklaus Dec 13, 2024
7d5f76d
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 16, 2024
743a284
Update google-translate requirements.
JoelNiklaus Dec 16, 2024
1a37f71
Added another example for a custom model.
JoelNiklaus Dec 17, 2024
2f27645
Made local mt model example more general to support madlad400 as well.
JoelNiklaus Dec 17, 2024
a4d4fee
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 17, 2024
bd08781
Merge branch 'main' into add-custom-model
clefourrier Dec 18, 2024
b7106e4
Make sure generation can happen on the GPU.
JoelNiklaus Dec 18, 2024
a7d176c
Fixed issue with src and tgt lang for seamless model.
JoelNiklaus Dec 19, 2024
f1ba65c
Added cleanup to free the GPU memory again.
JoelNiklaus Dec 19, 2024
ace6e59
Fix dependency issues by switching to deep-translator.
JoelNiklaus Dec 22, 2024
cfd7254
Made inference code more robust against empty responses.
JoelNiklaus Dec 22, 2024
3ddc104
Merge branch 'main' into add-custom-model
JoelNiklaus Dec 23, 2024
f6df2a3
Merge branch 'main' into add-custom-model
clefourrier Jan 2, 2025
348e427
Merge branch 'main' into add-custom-model
JoelNiklaus Jan 7, 2025
a63f4b3
Merge branch 'main' into add-custom-model
JoelNiklaus Jan 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import lighteval.main_accelerate
import lighteval.main_baseline
import lighteval.main_custom
import lighteval.main_endpoint
import lighteval.main_nanotron
import lighteval.main_tasks
Expand Down Expand Up @@ -63,6 +64,7 @@
app.command(rich_help_panel="Evaluation Utils")(lighteval.main_baseline.baseline)
app.command(rich_help_panel="Evaluation Backends")(lighteval.main_nanotron.nanotron)
app.command(rich_help_panel="Evaluation Backends")(lighteval.main_vllm.vllm)
app.command(rich_help_panel="Evaluation Backends")(lighteval.main_custom.custom)
app.add_typer(
lighteval.main_endpoint.app,
name="endpoint",
Expand Down
150 changes: 150 additions & 0 deletions src/lighteval/main_custom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# MIT License

# Copyright (c) 2024 The HuggingFace Team

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os
from dataclasses import dataclass
from typing import Optional

import typer
from typer import Argument, Option
from typing_extensions import Annotated


app = typer.Typer()


TOKEN = os.getenv("HF_TOKEN")
CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")

HELP_PANNEL_NAME_1 = "Common Paramaters"
HELP_PANNEL_NAME_2 = "Logging Parameters"
HELP_PANNEL_NAME_3 = "Debug Paramaters"
HELP_PANNEL_NAME_4 = "Modeling Paramaters"


@dataclass
class CustomModelConfig:
JoelNiklaus marked this conversation as resolved.
Show resolved Hide resolved
model: str
model_definition_file_path: str
JoelNiklaus marked this conversation as resolved.
Show resolved Hide resolved


@app.command(rich_help_panel="Evaluation Backends")
def custom(
# === general ===
model_name: Annotated[str, Argument(help="The model name to evaluate")],
model_definition_file_path: Annotated[str, Argument(help="The model definition file path to evaluate")],
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
# === Common parameters ===
use_chat_template: Annotated[
bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
] = False,
system_prompt: Annotated[
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
] = None,
dataset_loading_processes: Annotated[
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1)
] = 1,
custom_tasks: Annotated[
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1)
] = None,
cache_dir: Annotated[
str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1)
] = CACHE_DIR,
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1)
] = 1,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2)
] = "results",
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
push_to_tensorboard: Annotated[
bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
public_run: Annotated[
bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
results_org: Annotated[
Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2)
] = None,
save_details: Annotated[
bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
# === debug ===
max_samples: Annotated[
Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3)
] = None,
override_batch_size: Annotated[
int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANNEL_NAME_3)
] = None,
job_id: Annotated[
int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3)
] = 0,
):
"""
Evaluate custom models (can be anything).
"""
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
public=public_run,
hub_results_org=results_org,
)

parallelism_manager = ParallelismManager.CUSTOM
model_config = CustomModelConfig(model=model_name, model_definition_file_path=model_definition_file_path)

pipeline_params = PipelineParameters(
launcher_type=parallelism_manager,
env_config=env_config,
job_id=job_id,
dataset_loading_processes=dataset_loading_processes,
custom_tasks_directory=custom_tasks,
override_batch_size=override_batch_size,
num_fewshot_seeds=num_fewshot_seeds,
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
)
pipeline = Pipeline(
tasks=tasks,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)

pipeline.evaluate()

pipeline.show_results()

results = pipeline.get_results()

pipeline.save_and_push_results()

return results
33 changes: 33 additions & 0 deletions src/lighteval/models/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import logging
from typing import Union

from lighteval.main_custom import CustomModelConfig
from lighteval.models.abstract_model import LightevalModel
from lighteval.models.dummy.dummy_model import DummyModel, DummyModelConfig
from lighteval.models.endpoints.endpoint_model import (
InferenceEndpointModel,
Expand Down Expand Up @@ -57,6 +59,7 @@ def load_model( # noqa: C901
InferenceEndpointModelConfig,
DummyModelConfig,
VLLMModelConfig,
CustomModelConfig,
OpenAIModelConfig,
],
env_config: EnvConfig,
Expand Down Expand Up @@ -92,6 +95,9 @@ def load_model( # noqa: C901
if isinstance(config, VLLMModelConfig):
return load_model_with_accelerate_or_default(config=config, env_config=env_config)

if isinstance(config, CustomModelConfig):
return load_custom_model(config=config, env_config=env_config)

if isinstance(config, OpenAIModelConfig):
return load_openai_model(config=config, env_config=env_config)

Expand All @@ -107,6 +113,33 @@ def load_model_with_tgi(config: TGIModelConfig):
return model


def load_custom_model(config: CustomModelConfig, env_config: EnvConfig):
import importlib.util

# Load the Python file
JoelNiklaus marked this conversation as resolved.
Show resolved Hide resolved
spec = importlib.util.spec_from_file_location("custom_model_module", config.model_definition_file_path)
if spec is None or spec.loader is None:
raise ImportError(f"Could not load file: {config.model_definition_file_path}")

module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)

# Find the first class that inherits from LightevalModel
model_class = None
for attr_name in dir(module):
attr = getattr(module, attr_name)
if isinstance(attr, type) and issubclass(attr, LightevalModel) and attr != LightevalModel:
model_class = attr
break

if model_class is None:
raise ValueError(f"No class inheriting from LightevalModel found in {config.model_definition_file_path}")

model = model_class(config, env_config)

return model


def load_openai_model(config: OpenAIModelConfig, env_config: EnvConfig):
if not is_openai_available():
raise ImportError()
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class ParallelismManager(Enum):
TGI = auto()
OPENAI = auto()
VLLM = auto()
CUSTOM = auto()
NONE = auto()


Expand Down