Merge branch 'main' into prmtrainer

gaetanlop · Dec 13, 2024 · 0d48cfa · 0d48cfa
2 parents 83e174e + e3e171a
commit 0d48cfa
Show file tree

Hide file tree

Showing 10 changed files with 420 additions and 175 deletions.
diff --git a/.github/workflows/tests_latest.yml b/.github/workflows/tests_latest.yml
@@ -0,0 +1,45 @@
+name: Tests latest TRL release with dev dependencies
+
+on:
+  schedule:
+    - cron: '0 0 * * *'  # Runs daily at midnight UTC
+
+  workflow_dispatch:
+
+env:
+  TQDM_DISABLE: 1
+  CI_SLACK_CHANNEL: ${{ secrets.CI_PUSH_MAIN_CHANNEL }}
+
+jobs:
+  tests:
+    name: Tests latest TRL release with dev dependencies
+    runs-on: 'ubuntu-latest'
+    steps:
+      - name: Git checkout
+        uses: actions/checkout@v4
+        with: { ref: v0.12-release }
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: "pip"
+          cache-dependency-path: |
+              setup.py
+              requirements.txt
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -U git+https://github.com/huggingface/accelerate.git
+          python -m pip install -U git+https://github.com/huggingface/datasets.git
+          python -m pip install -U git+https://github.com/huggingface/transformers.git
+          python -m pip install ".[dev]"
+      - name: Test with pytest
+        run: |
+          make test
+      - name: Post to Slack
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
+          title: Results of latest TRL with Python 3.12 on ubuntu-latest with dev dependencies
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/docs/source/community_tutorials.md b/docs/source/community_tutorials.md
@@ -11,6 +11,8 @@ Community tutorials are made by active members of the Hugging Face community tha
 | Preference Optimization | [`DPOTrainer`]  | Align Mistral-7b using Direct Preference Optimization for human preference alignment     | [Maxime Labonne](https://huggingface.co/mlabonne)              | [Link](https://mlabonne.github.io/blog/posts/Fine_tune_Mistral_7b_with_DPO.html)                                     | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb)                                             |
 | Preference Optimization | [`ORPOTrainer`] | Fine-tuning Llama 3 with ORPO combining instruction tuning and preference alignment      | [Maxime Labonne](https://huggingface.co/mlabonne)              | [Link](https://mlabonne.github.io/blog/posts/2024-04-19_Fine_tune_Llama_3_with_ORPO.html)                            | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eHNWg9gnaXErdAa8_mcvjMupbSS6rDvi)                                                                                      |
 
+<Youtube id="cnGyyM0vOes" />
+
 # Vision Language Models
 
 | Task            | Class          | Description                                                                  | Author                                                 | Tutorial                                                             | Colab                                                                                                                                                                                                                           |

diff --git a/docs/source/data_utils.mdx b/docs/source/data_utils.mdx
@@ -1,15 +1,29 @@
-## Data Utilities
+# Data Utilities
+
+## is_conversational
 
 [[autodoc]] is_conversational
 
+## apply_chat_template
+
 [[autodoc]] apply_chat_template
 
+## maybe_apply_chat_template
+
 [[autodoc]] maybe_apply_chat_template
 
+## extract_prompt
+
 [[autodoc]] extract_prompt
 
+## maybe_extract_prompt
+
 [[autodoc]] maybe_extract_prompt
 
+## unpair_preference_dataset
+
 [[autodoc]] unpair_preference_dataset
 
+## maybe_unpair_preference_dataset
+
 [[autodoc]] maybe_unpair_preference_dataset
diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py
@@ -17,7 +17,7 @@
 
 from datasets import Dataset, DatasetDict
 from parameterized import parameterized
-from transformers import AutoTokenizer
+from transformers import AutoProcessor, AutoTokenizer
 
 from trl.data_utils import (
     apply_chat_template,
@@ -196,6 +196,37 @@ def test_maybe_apply_chat_template(self, tokenizer_id, example):
             self.assertIsInstance(result["label"], bool)
             self.assertEqual(result["label"], example["label"])
 
+    def test_apply_chat_template_with_tools(self):
+        tokenizer = AutoProcessor.from_pretrained("trl-internal-testing/tiny-LlamaForCausalLM-3.2")
+
+        # Define dummy test tools
+        def get_current_temperature(location: str):
+            """
+            Gets the temperature at a given location.
+
+            Args:
+                location: The location to get the temperature for
+            """
+            return 22.0
+
+        # Define test case
+        test_case = {
+            "prompt": [
+                {"content": "Whats the temperature in London?", "role": "user"},
+            ]
+        }
+        # Test with tools
+        result_with_tools = apply_chat_template(test_case, tokenizer, tools=[get_current_temperature])
+
+        # Verify tools are included in the output
+        self.assertIn("get_current_temperature", result_with_tools["prompt"])
+
+        # Test without tools
+        result_without_tools = apply_chat_template(test_case, tokenizer, tools=None)
+
+        # Verify tools are not included in the output
+        self.assertNotIn("get_current_temperature", result_without_tools["prompt"])
+
 
 class UnpairPreferenceDatasetTester(unittest.TestCase):
     paired_dataset = Dataset.from_dict(