Skip to content

Commit

Permalink
Add llama benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Nov 28, 2024
1 parent 3102084 commit 5daf071
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 3 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,14 @@ Performance 100 samples
| Model | Prompt | Arabic-English | English-Spanish | English-French | English-Russian |
|--------------|----------|----------------|-----------------|----------------|-----------------|
| DeepL | | 33.11 | - | 36.05 | 24.64 |
| llama3.1:70B | Prompt 3 | - | - | 32.65 | - |
| aya-35b | Prompt 2 | 30.75 | - | 31.48 | 20.06 |
| glm4:9b | Prompt 2 | 19.62 | - | 30.21 | 16.12 |
| glm-BF16-64 | Prompt 2 | 18.75 | - | 28.84 | 17.20 |
| glm-BF16-128 | Prompt 2 | 20.05 | - | 30.09 | 17.82 |
| llama3.1-8B | Prompt 2 | 10.52 | 25.37 | 27.53 | 14.04 |
| llama3.2-3B | Prompt 3 | - | - | 15.88 | - |
| llama3.1-8B | Prompt 3 | - | - | 26.57 | - |
| llama3.2-3B | Prompt 3 | - | - | 19.70 | - |



Expand Down
File renamed without changes.
5 changes: 3 additions & 2 deletions src/benchmark_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from tqdm import tqdm
from huggingface_hub import hf_hub_download

from configuration import ROOT_PATH
from data_model.TranslationTask import TranslationTask
from fast_bleu import BLEU
from translate import get_content
Expand Down Expand Up @@ -95,7 +96,7 @@ def get_performance(samples: list[tuple[str, str]], path: Path):
predictions += json.loads(Path(join(path, file)).read_text())
average_performance = 0
for i, (text_from, text_to) in tqdm(enumerate(samples)):
prediction = predictions[i]
prediction = predictions[i].replace("```", "")
average_performance += get_bleu_score(text_to, prediction)

print(f"Average performance: {100 * average_performance / len(samples)}")
Expand Down Expand Up @@ -129,5 +130,5 @@ def get_characters_to_translate():

# benchmark("aya:35b", "ar-en", 100)
# benchmark("glm4:9b", "en-fr", 100)
benchmark("llama3.1", "en-fr")
benchmark("llama3.1:70b", "en-fr", 100)
print("time", round(time() - start, 2), "s")

0 comments on commit 5daf071

Please sign in to comment.