From 02b628d2e412f769cbd59bb148f23e1468ab96cd Mon Sep 17 00:00:00 2001 From: Shashank S <126173294+Shashank1202@users.noreply.github.com> Date: Sat, 20 Jul 2024 11:29:06 +0530 Subject: [PATCH] Delete research/Text_Summarization.ipynb --- research/Text_Summarization.ipynb | 1834 ----------------------------- 1 file changed, 1834 deletions(-) delete mode 100644 research/Text_Summarization.ipynb diff --git a/research/Text_Summarization.ipynb b/research/Text_Summarization.ipynb deleted file mode 100644 index ae2d4b5..0000000 --- a/research/Text_Summarization.ipynb +++ /dev/null @@ -1,1834 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU", - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "90092cacd3e44f53bef246cc3d624584": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ec611a252f944c6a89b60a37b15c2a82", - "IPY_MODEL_197b3e495baf45e1ade8f8176dde4b22", - "IPY_MODEL_3bcdeb09d0aa4c7e83a297a3d41fdd94" - ], - "layout": "IPY_MODEL_d1759df44e904a5cbc4314be82cb3bbf" - } - }, - "ec611a252f944c6a89b60a37b15c2a82": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_37a5bef2ab23454ca68fc76c0d2a41d1", - "placeholder": "​", - "style": "IPY_MODEL_6a0341d915ce4e0082aa584828e5844c", - "value": "Map: 100%" - } - }, - "197b3e495baf45e1ade8f8176dde4b22": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_26f759d6e786439babe95c8a2b25f012", - "max": 819, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2114a60d4f3e40b5ba98d6bb04148a52", - "value": 819 - } - }, - "3bcdeb09d0aa4c7e83a297a3d41fdd94": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fdf66024e9d84e21b30139c7d8eaff84", - "placeholder": "​", - "style": "IPY_MODEL_e3997e17aa054822bd1900d6e8d95ada", - "value": " 819/819 [00:00<00:00, 1779.14 examples/s]" - } - }, - "d1759df44e904a5cbc4314be82cb3bbf": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "37a5bef2ab23454ca68fc76c0d2a41d1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6a0341d915ce4e0082aa584828e5844c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "26f759d6e786439babe95c8a2b25f012": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2114a60d4f3e40b5ba98d6bb04148a52": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "fdf66024e9d84e21b30139c7d8eaff84": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e3997e17aa054822bd1900d6e8d95ada": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "25eed2eee8da40969612e35ccd390ce7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4ad4c9dd67bc4e1288960d79346be440", - "IPY_MODEL_bb210d3fea6849d1a6a366448d754d8f", - "IPY_MODEL_20b2e6740e7d417f81cf1da86e08c868" - ], - "layout": "IPY_MODEL_0468969085b24a869c57ce7f2e02a39b" - } - }, - "4ad4c9dd67bc4e1288960d79346be440": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_863c2d9dc31944feb0fdb7bb97c42b1e", - "placeholder": "​", - "style": "IPY_MODEL_284de4f08a6c4ba88a45094262e2a9aa", - "value": "Downloading builder script: " - } - }, - "bb210d3fea6849d1a6a366448d754d8f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c9e993173428474ab55710d5f826a442", - "max": 2169, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_fc916a11361b43778cae578dfe07d8cd", - "value": 2169 - } - }, - "20b2e6740e7d417f81cf1da86e08c868": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_833821ed8b7c4ddc97fd28a69a3270e9", - "placeholder": "​", - "style": "IPY_MODEL_52e614fc54de4203a02c1ecf27350cb8", - "value": " 5.65k/? [00:00<00:00, 118kB/s]" - } - }, - "0468969085b24a869c57ce7f2e02a39b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "863c2d9dc31944feb0fdb7bb97c42b1e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "284de4f08a6c4ba88a45094262e2a9aa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c9e993173428474ab55710d5f826a442": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fc916a11361b43778cae578dfe07d8cd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "833821ed8b7c4ddc97fd28a69a3270e9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "52e614fc54de4203a02c1ecf27350cb8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } - }, - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZNMtcxS_XGIC", - "outputId": "8a76b0cd-5aa4-4b5f-fb39-f46a937afce1" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Sat Jun 29 06:07:48 2024 \n", - "+---------------------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", - "|-----------------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", - "| | | MIG M. |\n", - "|=========================================+======================+======================|\n", - "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", - "| N/A 40C P8 11W / 70W | 3MiB / 15360MiB | 0% Default |\n", - "| | | N/A |\n", - "+-----------------------------------------+----------------------+----------------------+\n", - " \n", - "+---------------------------------------------------------------------------------------+\n", - "| Processes: |\n", - "| GPU GI CI PID Type Process name GPU Memory |\n", - "| ID ID Usage |\n", - "|=======================================================================================|\n", - "| No running processes found |\n", - "+---------------------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "source": [ - "!pip install transformers[sentencepiece] datasets sacrebleu rouge_score py7zr -q" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "v6UDAlmRXPEA", - "outputId": "d17685cc-538f-4e76-8e4d-bea25b53e51f" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m413.8/413.8 kB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.9/138.9 kB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.7/49.7 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.1/93.1 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m63.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "cudf-cu12 24.4.1 requires pandas<2.2.2dev0,>=2.0, but you have pandas 2.2.2 which is incompatible.\n", - "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 16.1.0 which is incompatible.\n", - "gcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2024.5.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0m" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "!pip install --upgrade accelerate\n", - "!pip uninstall -y transformers accelerate\n", - "!pip install transformers accelerate" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "cYL75Mz7Xfqy", - "outputId": "2c619801-15e2-44d9-cc31-340180d56555" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.31.0)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.26.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (24.1)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", - "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.3.0+cu121)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.23.4)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.3)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.15.4)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.12.2)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.4)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2024.5.0)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (8.9.2.26)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.20.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.3.0)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.10.0->accelerate) (12.5.40)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.4)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.2.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2024.6.2)\n", - "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", - "Found existing installation: transformers 4.42.3\n", - "Uninstalling transformers-4.42.3:\n", - " Successfully uninstalled transformers-4.42.3\n", - "Found existing installation: accelerate 0.31.0\n", - "Uninstalling accelerate-0.31.0:\n", - " Successfully uninstalled accelerate-0.31.0\n", - "Collecting transformers\n", - " Using cached transformers-4.42.3-py3-none-any.whl (9.3 MB)\n", - "Collecting accelerate\n", - " Using cached accelerate-0.31.0-py3-none-any.whl (309 kB)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.15.4)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.23.4)\n", - "Requirement already satisfied: numpy<2.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.5.15)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n", - "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.4)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", - "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.3.0+cu121)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (2024.5.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (4.12.2)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.4)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (8.9.2.26)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.20.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.3.0)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.10.0->accelerate) (12.5.40)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.6.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)\n", - "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", - "Installing collected packages: transformers, accelerate\n", - "Successfully installed accelerate-0.31.0 transformers-4.42.3\n" - ] - }, - { - "output_type": "display_data", - "data": { - "application/vnd.colab-display-data+json": { - "pip_warning": { - "packages": [ - "accelerate", - "transformers" - ] - }, - "id": "2f77079ae2b14b99ac91264f19b80275" - } - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": [ - "from transformers import pipeline, set_seed\n", - "import pyarrow\n", - "from datasets import load_dataset, load_metric\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n", - "import nltk\n", - "from nltk.tokenize import sent_tokenize\n", - "from tqdm import tqdm\n", - "import torch\n", - "nltk.download(\"punkt\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7zmHL6crXnn3", - "outputId": "ba79ec9f-9a85-47e8-95d9-83d73f791edf" - }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[nltk_data] Downloading package punkt to /root/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": {}, - "execution_count": 1 - } - ] - }, - { - "cell_type": "code", - "source": [ - "device =\"cuda\" if torch.cuda.is_available() else \"cpu\"\n", - "device" - ], - "metadata": { - "id": "GHHS3pujecNt", - "outputId": "7cc18648-246c-4197-f436-19f9489fd23f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 36 - } - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'cuda'" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - } - }, - "metadata": {}, - "execution_count": 2 - } - ] - }, - { - "cell_type": "code", - "source": [ - "model_ckpt= \"google/pegasus-cnn_dailymail\"\n", - "tokenizer= AutoTokenizer.from_pretrained(model_ckpt)\n", - "model_pegasus= AutoModelForSeq2SeqLM.from_pretrained(model_ckpt).to(device)" - ], - "metadata": { - "id": "cGSRyJ2ue0Rg", - "outputId": "2652150a-94e4-4e0e-c87a-360a3448c5fa", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "!wget https://github.com/entbappy/Branching-tutorial/raw/master/summarizer-data.zip\n", - "!unzip summarizer-data.zip" - ], - "metadata": { - "id": "7PwtMfmhfNaT", - "outputId": "3b875a44-661f-4a02-ced4-daabf36ae6b0", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2024-06-29 06:09:32-- https://github.com/entbappy/Branching-tutorial/raw/master/summarizer-data.zip\n", - "Resolving github.com (github.com)... 20.205.243.166\n", - "Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n", - "HTTP request sent, awaiting response... 302 Found\n", - "Location: https://raw.githubusercontent.com/entbappy/Branching-tutorial/master/summarizer-data.zip [following]\n", - "--2024-06-29 06:09:32-- https://raw.githubusercontent.com/entbappy/Branching-tutorial/master/summarizer-data.zip\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 7903594 (7.5M) [application/zip]\n", - "Saving to: ‘summarizer-data.zip.3’\n", - "\n", - "summarizer-data.zip 100%[===================>] 7.54M --.-KB/s in 0.03s \n", - "\n", - "2024-06-29 06:09:33 (271 MB/s) - ‘summarizer-data.zip.3’ saved [7903594/7903594]\n", - "\n", - "Archive: summarizer-data.zip\n", - "replace samsum-test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: " - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from datasets import load_from_disk\n", - "dataset_samsum= load_from_disk('samsum_dataset')\n", - "dataset_samsum" - ], - "metadata": { - "id": "dN2EmD7gfxbf", - "outputId": "7e405789-4fa2-4a59-d7c8-cb8261adbceb", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['id', 'dialogue', 'summary'],\n", - " num_rows: 14732\n", - " })\n", - " test: Dataset({\n", - " features: ['id', 'dialogue', 'summary'],\n", - " num_rows: 819\n", - " })\n", - " validation: Dataset({\n", - " features: ['id', 'dialogue', 'summary'],\n", - " num_rows: 818\n", - " })\n", - "})" - ] - }, - "metadata": {}, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "source": [ - "split_lengths= [len(dataset_samsum[split]) for split in dataset_samsum]\n", - "\n", - "print(f\"Split lenghts: {split_lengths}\")\n", - "print(f\"Features: {dataset_samsum['train'].column_names}\")\n", - "print(\"\\n Dailogue: \")\n", - "print(dataset_samsum[\"test\"][1][\"dialogue\"])\n", - "print(\"\\n Summary:\")\n", - "print(dataset_samsum[\"test\"][1][\"summary\"])" - ], - "metadata": { - "id": "MDXNODWigor7", - "outputId": "57f19925-500b-43cd-ea4c-5c96cebe114e", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Split lenghts: [14732, 819, 818]\n", - "Features: ['id', 'dialogue', 'summary']\n", - "\n", - " Dailogue: \n", - "Eric: MACHINE!\r\n", - "Rob: That's so gr8!\r\n", - "Eric: I know! And shows how Americans see Russian ;)\r\n", - "Rob: And it's really funny!\r\n", - "Eric: I know! I especially like the train part!\r\n", - "Rob: Hahaha! No one talks to the machine like that!\r\n", - "Eric: Is this his only stand-up?\r\n", - "Rob: Idk. I'll check.\r\n", - "Eric: Sure.\r\n", - "Rob: Turns out no! There are some of his stand-ups on youtube.\r\n", - "Eric: Gr8! I'll watch them now!\r\n", - "Rob: Me too!\r\n", - "Eric: MACHINE!\r\n", - "Rob: MACHINE!\r\n", - "Eric: TTYL?\r\n", - "Rob: Sure :)\n", - "\n", - " Summary:\n", - "Eric and Rob are going to watch a stand-up on youtube.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "def convert_examples_to_features(example_batch):\n", - " input_encodings= tokenizer(example_batch['dialogue'], max_length= 1024, truncation= True)\n", - "\n", - " with tokenizer.as_target_tokenizer():\n", - " target_encodings= tokenizer(example_batch['summary'], max_length= 128, truncation= True)\n", - "\n", - " return{\n", - " 'input_ids': input_encodings['input_ids'],\n", - " 'attention_mask': input_encodings['attention_mask'],\n", - " 'labels':target_encodings['input_ids']\n", - " }" - ], - "metadata": { - "id": "0jcSDghRhi0s" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "dataset_samsum_pt= dataset_samsum.map(convert_examples_to_features, batched= True)" - ], - "metadata": { - "id": "bKqcmeL6jDGC", - "outputId": "adfbf727-9cc5-4be2-91bc-24d2e4491611", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 104, - "referenced_widgets": [ - "90092cacd3e44f53bef246cc3d624584", - "ec611a252f944c6a89b60a37b15c2a82", - "197b3e495baf45e1ade8f8176dde4b22", - "3bcdeb09d0aa4c7e83a297a3d41fdd94", - "d1759df44e904a5cbc4314be82cb3bbf", - "37a5bef2ab23454ca68fc76c0d2a41d1", - "6a0341d915ce4e0082aa584828e5844c", - "26f759d6e786439babe95c8a2b25f012", - "2114a60d4f3e40b5ba98d6bb04148a52", - "fdf66024e9d84e21b30139c7d8eaff84", - "e3997e17aa054822bd1900d6e8d95ada" - ] - } - }, - "execution_count": 8, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Map: 0%| | 0/819 [00:00" - ], - "text/html": [ - "\n", - "
\n", - " \n", - " \n", - " [51/51 04:03, Epoch 0/1]\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StepTraining LossValidation Loss

" - ] - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", - "Non-default generation parameters: {'max_length': 128, 'min_length': 32, 'num_beams': 8, 'length_penalty': 0.8, 'forced_eos_token_id': 1}\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TrainOutput(global_step=51, training_loss=3.0003189713347194, metrics={'train_runtime': 246.211, 'train_samples_per_second': 3.326, 'train_steps_per_second': 0.207, 'total_flos': 321588359725056.0, 'train_loss': 3.0003189713347194, 'epoch': 0.9963369963369964})" - ] - }, - "metadata": {}, - "execution_count": 16 - } - ] - }, - { - "cell_type": "code", - "source": [ - "#Evaluation\n", - "\n", - "def generate_batch_sized_chunks(list_of_elements, batch_size):\n", - " for i in range(0, len(list_of_elements), batch_size):\n", - " yield list_of_elements[i : i+batch_size]\n", - "\n", - "def calculate_metric_on_test_ds(dataset, metric, model, tokenizer,\n", - " batch_size=16,\n", - " device= device,\n", - " column_text= \"article\",\n", - " column_summary=\"highlights\"):\n", - " article_batches= list(generate_batch_sized_chunks(dataset[column_text], batch_size))\n", - " target_batches= list(generate_batch_sized_chunks(dataset[column_summary], batch_size))\n", - "\n", - " for article_batch, target_batch in tqdm(\n", - " zip(article_batches, target_batches), total=len(article_batches)):\n", - "\n", - " inputs= tokenizer(article_batch, max_length= 1024, truncation= True,\n", - " padding= \"max_length\", return_tensors= \"pt\")\n", - "\n", - " summaries= model.generate(input_ids= inputs[\"input_ids\"].to(device),\n", - " attention_mask= inputs[\"attention_mask\"].to(device),\n", - " length_penalty= 0.8, num_beams= 8, max_length= 128)\n", - "\n", - "\n", - " decoded_summaries= [tokenizer.decode(s, skip_special_tokens= True,\n", - " clean_up_tokenization_space= True)\n", - " for s in summaries]\n", - "\n", - " decoded_summaries=[d.replace(\"\", \" \")for d in decoded_summaries]\n", - "\n", - " metric.add_batch(predictions= decoded_summaries, references= target_batch)\n", - "\n", - " score= metric.compute()\n", - " return score" - ], - "metadata": { - "id": "Fm3iWhx1n9Pt" - }, - "execution_count": 17, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "rouge_name= [\"rouge1\", \"rouge2\", \"rougeL\", \"rougeLsum\"]\n", - "rouge_metric= load_metric('rouge')" - ], - "metadata": { - "id": "vMXpmyw3q4Xi", - "outputId": "ef1333b3-3d2b-4330-f968-d665074ff150", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 173, - "referenced_widgets": [ - "25eed2eee8da40969612e35ccd390ce7", - "4ad4c9dd67bc4e1288960d79346be440", - "bb210d3fea6849d1a6a366448d754d8f", - "20b2e6740e7d417f81cf1da86e08c868", - "0468969085b24a869c57ce7f2e02a39b", - "863c2d9dc31944feb0fdb7bb97c42b1e", - "284de4f08a6c4ba88a45094262e2a9aa", - "c9e993173428474ab55710d5f826a442", - "fc916a11361b43778cae578dfe07d8cd", - "833821ed8b7c4ddc97fd28a69a3270e9", - "52e614fc54de4203a02c1ecf27350cb8" - ] - } - }, - "execution_count": 18, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - ":2: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", - " rouge_metric= load_metric('rouge')\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading builder script: 0%| | 0.00/2.17k [00:00\n", - "

\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rouge1rouge2rougeLrougeLsum
pegasus0.0218610.00.0214320.021735
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"pd\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"rouge1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.021860626292527996,\n \"max\": 0.021860626292527996,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.021860626292527996\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rouge2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.0,\n \"max\": 0.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rougeL\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.021431510382023386,\n \"max\": 0.021431510382023386,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.021431510382023386\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rougeLsum\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.02173530914736061,\n \"max\": 0.02173530914736061,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.02173530914736061\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 20 - } - ] - }, - { - "cell_type": "code", - "source": [ - "model_pegasus.save_pretrained(\"pegasus-samsum-model\")" - ], - "metadata": { - "id": "BglMMmWUrESZ", - "outputId": "b7f4958d-fd1e-4645-e89d-00420f083706", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 21, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", - "Non-default generation parameters: {'max_length': 128, 'min_length': 32, 'num_beams': 8, 'length_penalty': 0.8, 'forced_eos_token_id': 1}\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "tokenizer.save_pretrained(\"tokenizer\")" - ], - "metadata": { - "id": "PFsZu5xMrEQU", - "outputId": "b08fbda8-9095-4cfc-cb16-2e7ff6f96a96", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 22, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "('tokenizer/tokenizer_config.json',\n", - " 'tokenizer/special_tokens_map.json',\n", - " 'tokenizer/spiece.model',\n", - " 'tokenizer/added_tokens.json',\n", - " 'tokenizer/tokenizer.json')" - ] - }, - "metadata": {}, - "execution_count": 22 - } - ] - }, - { - "cell_type": "code", - "source": [ - "tokenizer= AutoTokenizer.from_pretrained(\"/content/tokenizer\")" - ], - "metadata": { - "id": "JudD86dIrEN6" - }, - "execution_count": 24, - "outputs": [] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "V_tH-Sd8rELT" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "f8RbUvsRrEI8" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "9EP-Z4HqrEGj" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "!pip uninstall transformers accelerate -y\n" - ], - "metadata": { - "id": "k2gtxQlok3Se" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Step 1: Uninstall conflicting packages\n", - "!pip uninstall transformers accelerate -y\n", - "\n", - "# Step 2: Reinstall required packages\n", - "!pip install transformers[torch] accelerate\n" - ], - "metadata": { - "id": "368GM9EXlpDE" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "gen_kwargs= {\"length_penalty\": 0.8, \"num_beams\":8, \"max_length\": 128}\n", - "\n", - "sample_text= dataset_samsum[\"test\"][0][\"dialogue\"]\n", - "reference= dataset_samsum[\"test\"][0][\"summary\"]\n", - "pipe= pipeline(\"summarization\", model= \"pegasus-samsum-model\", tokenizer= tokenizer)\n", - "\n", - "print(\"Dialogue\")\n", - "print(sample_text)\n", - "\n", - "print(\"\\n Reference Summary\")\n", - "print(reference)\n", - "\n", - "print(\"\\n Model Summary\")\n", - "print(pipe(sample_text, **gen_kwargs)[0][\"summary_text\"])" - ], - "metadata": { - "id": "Oy99trCultl8", - "outputId": "9803db3b-d83c-498f-9900-97c32949c99d", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 25, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n", - "Your max_length is set to 128, but your input_length is only 122. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Dialogue\n", - "Hannah: Hey, do you have Betty's number?\n", - "Amanda: Lemme check\n", - "Hannah: \n", - "Amanda: Sorry, can't find it.\n", - "Amanda: Ask Larry\n", - "Amanda: He called her last time we were at the park together\n", - "Hannah: I don't know him well\n", - "Hannah: \n", - "Amanda: Don't be shy, he's very nice\n", - "Hannah: If you say so..\n", - "Hannah: I'd rather you texted him\n", - "Amanda: Just text him 🙂\n", - "Hannah: Urgh.. Alright\n", - "Hannah: Bye\n", - "Amanda: Bye bye\n", - "\n", - " Reference Summary\n", - "Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.\n", - "\n", - " Model Summary\n", - "Amanda: Ask Larry Amanda: He called her last time we were at the park together .Hannah: I'd rather you texted him .Amanda: Just text him .\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "SLWv2AnPub1w" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file